diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..1be15eecd --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,8 @@ +repos: + +- repo: https://github.com/pre-commit/mirrors-clang-format + rev: v19.1.0 + hooks: + - id: clang-format + files: \.(c|cxx|cpp|h|hxx|hpp)$ + exclude: ^deps/ diff --git a/_clang-format b/_clang-format index 37a50f367..aef7d8e0f 100644 --- a/_clang-format +++ b/_clang-format @@ -21,31 +21,31 @@ Language: Cpp AccessModifierOffset: -4 AlignAfterOpenBracket: Align -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignConsecutiveMacros: true +AlignConsecutiveAssignments: + Enabled: false +AlignConsecutiveDeclarations: + Enabled: false +AlignConsecutiveMacros: + Enabled: true AlignEscapedNewlines: Left -AlignOperands: true -AlignTrailingComments: false +AlignOperands: Align +AlignTrailingComments: + Kind: Never AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false -AllowShortBlocksOnASingleLine: false +AllowShortBlocksOnASingleLine: Never AllowShortCaseLabelsOnASingleLine: true AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: Never AllowShortLambdasOnASingleLine: Inline AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: Yes BinPackArguments: true BinPackParameters: true BraceWrapping: AfterCaseLabel: false AfterClass: false - AfterControlStatement: false + AfterControlStatement: Never AfterEnum: false AfterFunction: false AfterNamespace: false @@ -59,46 +59,33 @@ BraceWrapping: SplitEmptyFunction: false SplitEmptyRecord: false SplitEmptyNamespace: false +BreakAfterReturnType: Automatic BreakBeforeBinaryOperators: None BreakBeforeBraces: Custom BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeColon BreakInheritanceList: BeforeColon BreakStringLiterals: false +BreakTemplateDeclarations: Yes ColumnLimit: 100 -CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 8 ContinuationIndentWidth: 4 Cpp11BracedListStyle: false -DerivePointerAlignment: true +DerivePointerAlignment: false DisableFormat: false FixNamespaceComments: true -ForEachMacros: - - foreach - - Q_FOREACH - - BOOST_FOREACH -IncludeBlocks: Preserve -IncludeCategories: - - Regex: '^' - Priority: 2 - - Regex: '^<.*\.h>' - Priority: 1 - - Regex: '^<.*' - Priority: 2 - - Regex: '.*' - Priority: 3 -IncludeIsMainRegex: '([-_](test|unittest))?$' IndentCaseLabels: true IndentPPDirectives: None IndentWidth: 4 IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '' -MacroBlockEnd: '' +KeepEmptyLines: + AtEndOfFile: false + AtStartOfBlock: false + AtStartOfFile: false MaxEmptyLinesToKeep: 1 NamespaceIndentation: None +PackConstructorInitializers: CurrentLine PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 @@ -108,57 +95,24 @@ PenaltyBreakTemplateDeclaration: 10 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Left -RawStringFormats: - - Language: Cpp - Delimiters: - - cc - - CC - - cpp - - Cpp - - CPP - - 'c++' - - 'C++' - CanonicalDelimiter: '' - BasedOnStyle: google - - Language: TextProto - Delimiters: - - pb - - PB - - proto - - PROTO - EnclosingFunctions: - - EqualsProto - - EquivToProto - - PARSE_PARTIAL_TEXT_PROTO - - PARSE_TEST_PROTO - - PARSE_TEXT_PROTO - - ParseTextOrDie - - ParseTextProtoOrDie - CanonicalDelimiter: '' - BasedOnStyle: google ReflowComments: false -SortIncludes: false -SortUsingDeclarations: false +SortIncludes: Never +SortUsingDeclarations: Never SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements SpaceBeforeRangeBasedForLoopColon: true -SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 -SpacesInAngles: false +SpacesInAngles: Never SpacesInContainerLiterals: false -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false +SpacesInParens: Never SpacesInSquareBrackets: false -Standard: Cpp11 -StatementMacros: - - Q_UNUSED - - QT_REQUIRE_VERSION -TabWidth: 1 +Standard: c++17 UseTab: Never ... diff --git a/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp b/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp index 641ee5ef4..295cc4dea 100644 --- a/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp +++ b/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp @@ -61,7 +61,7 @@ // // is performed and finally the results are post processed. // -void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) { +void run_gemm_example(const sycl::device& cpu_dev, const sycl::device& gpu_dev) { // // Initialize data for Gemm // @@ -89,11 +89,11 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) // Catch asynchronous exceptions for CPU and GPU auto cpu_exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught asynchronous SYCL exception on CPU device during GEMM:" << std::endl; std::cerr << "\t" << e.what() << std::endl; @@ -102,11 +102,11 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) std::exit(2); }; auto gpu_exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught asynchronous SYCL exception on GPU device during GEMM:" << std::endl; std::cerr << "\t" << e.what() << std::endl; @@ -141,9 +141,9 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) sycl::context cpu_cxt = cpu_queue.get_context(); // allocate on CPU device and copy data from host to SYCL CPU device - float *cpu_A = sycl::malloc_device(sizea * sizeof(float), cpu_queue); - float *cpu_B = sycl::malloc_device(sizeb * sizeof(float), cpu_queue); - float *cpu_C = sycl::malloc_device(sizec * sizeof(float), cpu_queue); + float* cpu_A = sycl::malloc_device(sizea * sizeof(float), cpu_queue); + float* cpu_B = sycl::malloc_device(sizeb * sizeof(float), cpu_queue); + float* cpu_C = sycl::malloc_device(sizec * sizeof(float), cpu_queue); if (!cpu_A || !cpu_B || !cpu_C) { throw std::runtime_error("Failed to allocate USM memory."); } @@ -159,9 +159,9 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) sycl::context gpu_cxt = gpu_queue.get_context(); // allocate on GPU device and copy data from host to SYCL GPU device - float *gpu_A = sycl::malloc_device(sizea * sizeof(float), gpu_queue); - float *gpu_B = sycl::malloc_device(sizeb * sizeof(float), gpu_queue); - float *gpu_C = sycl::malloc_device(sizec * sizeof(float), gpu_queue); + float* gpu_A = sycl::malloc_device(sizea * sizeof(float), gpu_queue); + float* gpu_B = sycl::malloc_device(sizeb * sizeof(float), gpu_queue); + float* gpu_C = sycl::malloc_device(sizec * sizeof(float), gpu_queue); if (!gpu_A || !gpu_B || !gpu_C) { throw std::runtime_error("Failed to allocate USM memory."); } @@ -174,11 +174,11 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) // // add oneapi::math::blas::gemm to execution queue cpu_gemm_done = oneapi::math::blas::column_major::gemm( - oneapi::math::backend_selector{ cpu_queue }, transA, transB, m, - n, k, alpha, cpu_A, ldA, cpu_B, ldB, beta, cpu_C, ldC); + oneapi::math::backend_selector{ cpu_queue }, transA, transB, + m, n, k, alpha, cpu_A, ldA, cpu_B, ldB, beta, cpu_C, ldC); gpu_gemm_done = oneapi::math::blas::column_major::gemm( - oneapi::math::backend_selector{ gpu_queue }, transA, transB, m, - n, k, alpha, gpu_A, ldA, gpu_B, ldB, beta, gpu_C, ldC); + oneapi::math::backend_selector{ gpu_queue }, transA, transB, + m, n, k, alpha, gpu_A, ldA, gpu_B, ldB, beta, gpu_C, ldC); // Wait until calculations are done cpu_gemm_done.wait_and_throw(); @@ -260,7 +260,7 @@ void print_example_banner() { // // Main entry point for example. // -int main(int argc, char **argv) { +int main(int argc, char** argv) { print_example_banner(); try { @@ -279,13 +279,13 @@ int main(int argc, char **argv) { run_gemm_example(cpu_dev, gpu_dev); std::cout << "BLAS GEMM USM example ran OK on MKLCPU and CUBLAS" << std::endl; } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught synchronous SYCL exception during GEMM:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } - catch (std::exception const &e) { + catch (std::exception const& e) { std::cerr << "Caught std::exception during GEMM:"; std::cerr << "\t" << e.what() << std::endl; return 1; diff --git a/examples/blas/run_time_dispatching/level3/gemm_usm.cpp b/examples/blas/run_time_dispatching/level3/gemm_usm.cpp index 3df710101..1cfef28fb 100644 --- a/examples/blas/run_time_dispatching/level3/gemm_usm.cpp +++ b/examples/blas/run_time_dispatching/level3/gemm_usm.cpp @@ -136,7 +136,7 @@ void run_gemm_example(const sycl::device& dev) { // // add oneapi::math::blas::gemm to execution queue gemm_done = oneapi::math::blas::column_major::gemm(main_queue, transA, transB, m, n, k, alpha, - dev_A, ldA, dev_B, ldB, beta, dev_C, ldC); + dev_A, ldA, dev_B, ldB, beta, dev_C, ldC); // Wait until calculations are done main_queue.wait_and_throw(); diff --git a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp index fdad04de2..864e073eb 100644 --- a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp +++ b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp @@ -83,7 +83,7 @@ void run_example(const sycl::device& cpu_device, const sycl::device& gpu_device) // enabling // 1. create descriptors oneapi::math::dft::descriptor + oneapi::math::dft::domain::COMPLEX> desc(static_cast(N)); // 2. variadic set_value diff --git a/examples/dft/run_time_dispatching/real_fwd_usm.cpp b/examples/dft/run_time_dispatching/real_fwd_usm.cpp index 5f1f53acf..f674a5c42 100644 --- a/examples/dft/run_time_dispatching/real_fwd_usm.cpp +++ b/examples/dft/run_time_dispatching/real_fwd_usm.cpp @@ -54,7 +54,7 @@ void run_example(const sycl::device& dev) { // 1. create descriptors oneapi::math::dft::descriptor + oneapi::math::dft::domain::REAL> desc(static_cast(N)); // 2. variadic set_value diff --git a/examples/include/example_helper.hpp b/examples/include/example_helper.hpp index 9ed99940c..681ab9142 100644 --- a/examples/include/example_helper.hpp +++ b/examples/include/example_helper.hpp @@ -88,7 +88,7 @@ fp rand_scalar() { } template -void rand_matrix(vec &M, oneapi::math::transpose trans, int m, int n, int ld) { +void rand_matrix(vec& M, oneapi::math::transpose trans, int m, int n, int ld) { using fp = typename vec::value_type; if (trans == oneapi::math::transpose::nontrans) { @@ -104,7 +104,7 @@ void rand_matrix(vec &M, oneapi::math::transpose trans, int m, int n, int ld) { } template -intType generate_sparse_matrix(const intType nx, intType *ia, intType *ja, fp *a, +intType generate_sparse_matrix(const intType nx, intType* ia, intType* ja, fp* a, const intType index = 0) { intType nz = nx, ny = nx; intType nnz = 0; @@ -172,7 +172,7 @@ bool check_result(fp res, fp ref, intType nFlops, intType index) { } template -void free_vec(std::vector &ptr_vec, sycl::queue queue) { +void free_vec(std::vector& ptr_vec, sycl::queue queue) { for (auto ptr : ptr_vec) { sycl::free(ptr, queue); } diff --git a/examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp b/examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp index f8ee084de..658006938 100644 --- a/examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp +++ b/examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp @@ -178,8 +178,8 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d std::int64_t gpu_getrf_scratchpad_size = oneapi::math::lapack::getrf_scratchpad_size( oneapi::math::backend_selector{ gpu_queue }, m, n, lda); std::int64_t gpu_getrs_scratchpad_size = oneapi::math::lapack::getrs_scratchpad_size( - oneapi::math::backend_selector{ gpu_queue }, trans, n, nrhs, - lda, ldb); + oneapi::math::backend_selector{ gpu_queue }, trans, n, + nrhs, lda, ldb); float* gpu_getrf_scratchpad = sycl::malloc_device( gpu_getrf_scratchpad_size * sizeof(float), gpu_device, gpu_context); float* gpu_getrs_scratchpad = sycl::malloc_device( @@ -197,8 +197,8 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d // cpu_getrf_done = oneapi::math::lapack::getrf( - oneapi::math::backend_selector{ cpu_queue }, m, n, cpu_A, lda, - cpu_ipiv, cpu_getrf_scratchpad, cpu_getrf_scratchpad_size); + oneapi::math::backend_selector{ cpu_queue }, m, n, cpu_A, + lda, cpu_ipiv, cpu_getrf_scratchpad, cpu_getrf_scratchpad_size); cpu_getrs_done = oneapi::math::lapack::getrs( oneapi::math::backend_selector{ cpu_queue }, trans, n, nrhs, cpu_A, lda, cpu_ipiv, cpu_B, ldb, cpu_getrs_scratchpad, cpu_getrs_scratchpad_size, @@ -207,8 +207,8 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d oneapi::math::backend_selector{ gpu_queue }, m, n, gpu_A, lda, gpu_ipiv, gpu_getrf_scratchpad, gpu_getrf_scratchpad_size); gpu_getrs_done = oneapi::math::lapack::getrs( - oneapi::math::backend_selector{ gpu_queue }, trans, n, nrhs, - gpu_A, lda, gpu_ipiv, gpu_B, ldb, gpu_getrs_scratchpad, gpu_getrs_scratchpad_size, + oneapi::math::backend_selector{ gpu_queue }, trans, n, + nrhs, gpu_A, lda, gpu_ipiv, gpu_B, ldb, gpu_getrs_scratchpad, gpu_getrs_scratchpad_size, { gpu_getrf_done }); // Wait until calculations are done diff --git a/examples/lapack/run_time_dispatching/getrs_usm.cpp b/examples/lapack/run_time_dispatching/getrs_usm.cpp index 68a7836e0..c31d96d6e 100644 --- a/examples/lapack/run_time_dispatching/getrs_usm.cpp +++ b/examples/lapack/run_time_dispatching/getrs_usm.cpp @@ -146,10 +146,10 @@ void run_getrs_example(const sycl::device& device) { // Execute on device getrf_done = oneapi::math::lapack::getrf(queue, m, n, dev_A, lda, dev_ipiv, getrf_scratchpad, - getrf_scratchpad_size); + getrf_scratchpad_size); getrs_done = oneapi::math::lapack::getrs(queue, trans, n, nrhs, dev_A, lda, dev_ipiv, dev_B, ldb, - getrs_scratchpad, getrs_scratchpad_size, { getrf_done }); + getrs_scratchpad, getrs_scratchpad_size, { getrf_done }); // Wait until calculations are done queue.wait_and_throw(); diff --git a/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp index a023c0a69..20fc2f3a2 100644 --- a/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp +++ b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp @@ -60,7 +60,7 @@ // is performed and finally the results are post processed. // template -int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { +int run_sparse_matrix_vector_multiply_example(const sycl::device& cpu_dev) { // Matrix data size intType size = 4; intType nrows = size * size * size; @@ -71,11 +71,11 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { // Catch asynchronous exceptions auto exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cout << "Caught asynchronous SYCL " "exception during sparse::spmv:\n" << e.what() << std::endl; @@ -94,12 +94,12 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { std::size_t sizeia = static_cast(nrows + 1); std::size_t sizevec = static_cast(nrows); - ia = (intType *)sycl::malloc_shared(sizeia * sizeof(intType), cpu_queue); - ja = (intType *)sycl::malloc_shared(sizeja * sizeof(intType), cpu_queue); - a = (fp *)sycl::malloc_shared(sizea * sizeof(fp), cpu_queue); - x = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); - y = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); - z = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); + ia = (intType*)sycl::malloc_shared(sizeia * sizeof(intType), cpu_queue); + ja = (intType*)sycl::malloc_shared(sizeja * sizeof(intType), cpu_queue); + a = (fp*)sycl::malloc_shared(sizea * sizeof(fp), cpu_queue); + x = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); + y = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); + z = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); if (!ia || !ja || !a || !x || !y || !z) { throw std::runtime_error("Failed to allocate USM memory"); @@ -114,10 +114,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { z[i] = set_fp_value(fp(0.0)); } - std::vector int_ptr_vec; + std::vector int_ptr_vec; int_ptr_vec.push_back(ia); int_ptr_vec.push_back(ja); - std::vector fp_ptr_vec; + std::vector fp_ptr_vec; fp_ptr_vec.push_back(a); fp_ptr_vec.push_back(x); fp_ptr_vec.push_back(y); @@ -143,7 +143,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { // Create and initialize handle for a Sparse Matrix in CSR format oneapi::math::sparse::matrix_handle_t A_handle = nullptr; oneapi::math::sparse::init_csr_matrix(cpu_selector, &A_handle, nrows, nrows, nnz, - oneapi::math::index_base::zero, ia, ja, a); + oneapi::math::index_base::zero, ia, ja, a); // Create and initialize dense vector handles oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr; @@ -158,17 +158,17 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { // Allocate external workspace std::size_t workspace_size = 0; oneapi::math::sparse::spmv_buffer_size(cpu_selector, transA, &alpha, A_view, A_handle, x_handle, - &beta, y_handle, alg, descr, workspace_size); - void *workspace = sycl::malloc_device(workspace_size, cpu_queue); + &beta, y_handle, alg, descr, workspace_size); + void* workspace = sycl::malloc_device(workspace_size, cpu_queue); // Optimize spmv auto ev_opt = - oneapi::math::sparse::spmv_optimize(cpu_selector, transA, &alpha, A_view, A_handle, x_handle, - &beta, y_handle, alg, descr, workspace); + oneapi::math::sparse::spmv_optimize(cpu_selector, transA, &alpha, A_view, A_handle, + x_handle, &beta, y_handle, alg, descr, workspace); // Run spmv auto ev_spmv = oneapi::math::sparse::spmv(cpu_selector, transA, &alpha, A_view, A_handle, - x_handle, &beta, y_handle, alg, descr, { ev_opt }); + x_handle, &beta, y_handle, alg, descr, { ev_opt }); // Release handles and descriptor std::vector release_events; @@ -188,7 +188,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { // Post Processing // - fp *res = y; + fp* res = y; const bool isConj = (transA == oneapi::math::transpose::conjtrans); for (intType row = 0; row < nrows; row++) { z[row] *= beta; @@ -254,7 +254,7 @@ void print_example_banner() { // // Main entry point for example // -int main(int /*argc*/, char ** /*argv*/) { +int main(int /*argc*/, char** /*argv*/) { print_example_banner(); try { @@ -269,13 +269,13 @@ int main(int /*argc*/, char ** /*argv*/) { run_sparse_matrix_vector_multiply_example(cpu_dev); std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl; } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } - catch (std::exception const &e) { + catch (std::exception const& e) { std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; return 1; diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp index ba51a08e8..a6ff30354 100644 --- a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp +++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp @@ -61,7 +61,7 @@ // is performed and finally the results are post processed. // template -int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { +int run_sparse_matrix_vector_multiply_example(const sycl::device& dev) { // Matrix data size intType size = 4; intType nrows = size * size * size; @@ -72,11 +72,11 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // Catch asynchronous exceptions auto exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cout << "Caught asynchronous SYCL " "exception during sparse::spmv:\n" << e.what() << std::endl; @@ -95,12 +95,12 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { std::size_t sizevec = static_cast(nrows); auto sizevec_i64 = static_cast(sizevec); - ia = (intType *)sycl::malloc_shared(sizeia * sizeof(intType), main_queue); - ja = (intType *)sycl::malloc_shared(sizeja * sizeof(intType), main_queue); - a = (fp *)sycl::malloc_shared(sizea * sizeof(fp), main_queue); - x = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); - y = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); - z = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); + ia = (intType*)sycl::malloc_shared(sizeia * sizeof(intType), main_queue); + ja = (intType*)sycl::malloc_shared(sizeja * sizeof(intType), main_queue); + a = (fp*)sycl::malloc_shared(sizea * sizeof(fp), main_queue); + x = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); + y = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); + z = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); if (!ia || !ja || !a || !x || !y || !z) { throw std::runtime_error("Failed to allocate USM memory"); @@ -115,10 +115,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { z[i] = set_fp_value(fp(0.0)); } - std::vector int_ptr_vec; + std::vector int_ptr_vec; int_ptr_vec.push_back(ia); int_ptr_vec.push_back(ja); - std::vector fp_ptr_vec; + std::vector fp_ptr_vec; fp_ptr_vec.push_back(a); fp_ptr_vec.push_back(x); fp_ptr_vec.push_back(y); @@ -144,7 +144,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // Create and initialize handle for a Sparse Matrix in CSR format oneapi::math::sparse::matrix_handle_t A_handle = nullptr; oneapi::math::sparse::init_csr_matrix(main_queue, &A_handle, nrows, nrows, nnz, - oneapi::math::index_base::zero, ia, ja, a); + oneapi::math::index_base::zero, ia, ja, a); // Create and initialize dense vector handles oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr; @@ -159,17 +159,17 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // Allocate external workspace std::size_t workspace_size = 0; oneapi::math::sparse::spmv_buffer_size(main_queue, transA, &alpha, A_view, A_handle, x_handle, - &beta, y_handle, alg, descr, workspace_size); - void *workspace = sycl::malloc_device(workspace_size, main_queue); + &beta, y_handle, alg, descr, workspace_size); + void* workspace = sycl::malloc_device(workspace_size, main_queue); // Optimize spmv auto ev_opt = oneapi::math::sparse::spmv_optimize(main_queue, transA, &alpha, A_view, A_handle, x_handle, - &beta, y_handle, alg, descr, workspace); + &beta, y_handle, alg, descr, workspace); // Run spmv - auto ev_spmv = oneapi::math::sparse::spmv(main_queue, transA, &alpha, A_view, A_handle, x_handle, - &beta, y_handle, alg, descr, { ev_opt }); + auto ev_spmv = oneapi::math::sparse::spmv(main_queue, transA, &alpha, A_view, A_handle, + x_handle, &beta, y_handle, alg, descr, { ev_opt }); // Release handles and descriptor std::vector release_events; @@ -189,7 +189,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // Post Processing // - fp *res = y; + fp* res = y; const bool isConj = (transA == oneapi::math::transpose::conjtrans); for (intType row = 0; row < nrows; row++) { z[row] *= beta; @@ -258,7 +258,7 @@ void print_example_banner() { // // Main entry point for example // -int main(int /*argc*/, char ** /*argv*/) { +int main(int /*argc*/, char** /*argv*/) { print_example_banner(); try { @@ -279,13 +279,13 @@ int main(int /*argc*/, char ** /*argv*/) { run_sparse_matrix_vector_multiply_example(dev); std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl; } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } - catch (std::exception const &e) { + catch (std::exception const& e) { std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; return 1; diff --git a/include/oneapi/math/bfloat16.hpp b/include/oneapi/math/bfloat16.hpp index ebf267283..21a84eab7 100644 --- a/include/oneapi/math/bfloat16.hpp +++ b/include/oneapi/math/bfloat16.hpp @@ -70,7 +70,7 @@ struct bfloat16 { inline bfloat16(float f); bfloat16(double d) : bfloat16(float(d)) {} template - bfloat16(T i, typename std::enable_if::value>::type *_ = nullptr) + bfloat16(T i, typename std::enable_if::value>::type* _ = nullptr) : bfloat16(float(i)) {} inline operator float() const; @@ -101,115 +101,115 @@ struct bfloat16 { return h; } - friend float operator+(const bfloat16 &h1, const bfloat16 &h2) { + friend float operator+(const bfloat16& h1, const bfloat16& h2) { return float(h1) + float(h2); } - friend float operator-(const bfloat16 &h1, const bfloat16 &h2) { + friend float operator-(const bfloat16& h1, const bfloat16& h2) { return float(h1) - float(h2); } - friend float operator*(const bfloat16 &h1, const bfloat16 &h2) { + friend float operator*(const bfloat16& h1, const bfloat16& h2) { return float(h1) * float(h2); } - friend float operator/(const bfloat16 &h1, const bfloat16 &h2) { + friend float operator/(const bfloat16& h1, const bfloat16& h2) { return float(h1) / float(h2); } template friend typename std::enable_if::value, float>::type operator+( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) + float(o); } template friend typename std::enable_if::value, float>::type operator-( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) - float(o); } template friend typename std::enable_if::value, float>::type operator*( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) * float(o); } template friend typename std::enable_if::value, float>::type operator/( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) / float(o); } template friend typename std::enable_if::value, float>::type operator+( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return float(o) + float(h); } template friend typename std::enable_if::value, float>::type operator-( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return float(o) - float(h); } template friend typename std::enable_if::value, float>::type operator*( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return float(o) * float(h); } template friend typename std::enable_if::value, float>::type operator/( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return float(o) / float(h); } template friend typename std::enable_if::value, T>::type operator+( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) + o; } template friend typename std::enable_if::value, T>::type operator-( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) - o; } template friend typename std::enable_if::value, T>::type operator*( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) * o; } template friend typename std::enable_if::value, T>::type operator/( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) / o; } template friend typename std::enable_if::value, T>::type operator+( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return o + float(h); } template friend typename std::enable_if::value, T>::type operator-( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return o - float(h); } template friend typename std::enable_if::value, T>::type operator*( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return o * float(h); } template friend typename std::enable_if::value, T>::type operator/( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return o / float(h); } template - bfloat16 operator+=(const T &o) { + bfloat16 operator+=(const T& o) { return *this = bfloat16(*this + o); } template - bfloat16 operator-=(const T &o) { + bfloat16 operator-=(const T& o) { return *this = bfloat16(*this - o); } template - bfloat16 operator*=(const T &o) { + bfloat16 operator*=(const T& o) { return *this = bfloat16(*this * o); } template - bfloat16 operator/=(const T &o) { + bfloat16 operator/=(const T& o) { return *this = bfloat16(*this / o); } }; diff --git a/include/oneapi/math/blas.hxx b/include/oneapi/math/blas.hxx index 374585912..cb89703fc 100644 --- a/include/oneapi/math/blas.hxx +++ b/include/oneapi/math/blas.hxx @@ -19,1723 +19,1679 @@ // Buffer APIs -static inline void asum(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { detail::asum(get_device_id(queue), queue, n, x, incx, result); } -static inline void asum(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void asum(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::asum(get_device_id(queue), queue, n, x, incx, result); } -static inline void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::asum(get_device_id(queue), queue, n, x, incx, result); } -static inline void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::asum(get_device_id(queue), queue, n, x, incx, result); } -static inline void axpy(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +static inline void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy); } -static inline void axpy(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +static inline void axpy(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy) { detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy); } -static inline void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy); } -static inline void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy); } -static inline void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { +static inline void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { +static inline void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +static inline void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +static inline void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void axpby(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy); } -static inline void axpby(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void axpby(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy) { detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy); } -static inline void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +static inline void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy); } -static inline void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +static inline void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy); } -static inline void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +static inline void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::copy(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +static inline void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::copy(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void copy(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy) { detail::copy(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void copy(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void copy(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { detail::copy(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { +static inline void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, +static inline void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +static inline void copy_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +static inline void copy_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +static inline void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +static inline void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +static inline void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dotc(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +static inline void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& result) { detail::dotc(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dotc(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +static inline void dotc(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { detail::dotc(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dotu(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +static inline void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& result) { detail::dotu(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dotu(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +static inline void dotu(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { detail::dotu(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +static inline void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +static inline void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc) { +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - int8_t ao, sycl::buffer &b, std::int64_t ldb, - uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { +static inline void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co) { detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -static inline void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - int8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { +static inline void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co) { detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -static inline void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - uint8_t ao, sycl::buffer &b, std::int64_t ldb, - int8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { +static inline void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co) { detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -static inline void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - uint8_t ao, sycl::buffer &b, std::int64_t ldb, - uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { +static inline void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co) { detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -static inline void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + double beta, sycl::buffer& c, std::int64_t ldc) { detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +static inline void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc) { detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +static inline void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc) { detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy) { detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { +static inline void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -static inline void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, +static inline void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -static inline void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, +static inline void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -static inline void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, +static inline void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -static inline void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +static inline void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -static inline void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { +static inline void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -static inline void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { +static inline void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -static inline void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { +static inline void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -static inline void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +static inline void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { detail::ger(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +static inline void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { detail::ger(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::gerc(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::gerc(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::geru(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::geru(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::hbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -static inline void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::hbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -static inline void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +static inline void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::hemm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +static inline void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::hemm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::hemv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::hemv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +static inline void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { detail::her(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda); } -static inline void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +static inline void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { detail::her(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda); } -static inline void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::her2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -static inline void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::her2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -static inline void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { detail::her2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - double beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { detail::her2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, +static inline void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::herk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer, 1> &a, - std::int64_t lda, double beta, sycl::buffer, 1> &c, +static inline void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::herk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +static inline void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::hpmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -static inline void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +static inline void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::hpmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -static inline void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +static inline void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { detail::hpr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a); } -static inline void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +static inline void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { detail::hpr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a); } -static inline void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a) { +static inline void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a) { detail::hpr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a); } -static inline void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a) { +static inline void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a) { detail::hpr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a); } -static inline void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::iamax(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::iamax(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamax(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void iamax(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::iamax(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamax(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void iamax(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::iamax(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::iamin(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::iamin(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamin(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void iamin(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::iamin(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamin(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void iamin(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::iamin(get_device_id(queue), queue, n, x, incx, result); } -static inline void nrm2(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { detail::nrm2(get_device_id(queue), queue, n, x, incx, result); } -static inline void nrm2(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void nrm2(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::nrm2(get_device_id(queue), queue, n, x, incx, result); } -static inline void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::nrm2(get_device_id(queue), queue, n, x, incx, result); } -static inline void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::nrm2(get_device_id(queue), queue, n, x, incx, result); } -static inline void rot(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, - float s) { +static inline void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, float c, float s) { detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s); } -static inline void rot(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, - double s) { +static inline void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, double c, double s) { detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s); } -static inline void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, +static inline void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s); } -static inline void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - double c, double s) { +static inline void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, + double s) { detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s); } -static inline void rotg(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +static inline void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { detail::rotg(get_device_id(queue), queue, a, b, c, s); } -static inline void rotg(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +static inline void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { detail::rotg(get_device_id(queue), queue, a, b, c, s); } -static inline void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +static inline void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { detail::rotg(get_device_id(queue), queue, a, b, c, s); } -static inline void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s) { +static inline void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { detail::rotg(get_device_id(queue), queue, a, b, c, s); } -static inline void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +static inline void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { detail::rotm(get_device_id(queue), queue, n, x, incx, y, incy, param); } -static inline void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +static inline void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { detail::rotm(get_device_id(queue), queue, n, x, incx, y, incy, param); } -static inline void rotmg(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +static inline void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param) { detail::rotmg(get_device_id(queue), queue, d1, d2, x1, y1, param); } -static inline void rotmg(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - double y1, sycl::buffer ¶m) { +static inline void rotmg(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { detail::rotmg(get_device_id(queue), queue, d1, d2, x1, y1, param); } -static inline void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { detail::sbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -static inline void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy) { detail::sbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -static inline void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void sdsdot(sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +static inline void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { detail::sdsdot(get_device_id(queue), queue, n, sb, x, incx, y, incy, result); } -static inline void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, - std::int64_t incy) { +static inline void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { detail::spmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -static inline void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, - std::int64_t incy) { +static inline void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { detail::spmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -static inline void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &a) { +static inline void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { detail::spr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a); } -static inline void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &a) { +static inline void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { detail::spr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a); } -static inline void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a) { +static inline void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { detail::spr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a); } -static inline void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a) { +static inline void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { detail::spr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a); } -static inline void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +static inline void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::swap(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +static inline void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::swap(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void swap(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy) { detail::swap(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void swap(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void swap(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { detail::swap(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc) { +static inline void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc) { +static inline void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +static inline void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +static inline void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { detail::symv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { detail::symv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda) { +static inline void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, + std::int64_t lda) { detail::syr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda); } -static inline void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda) { +static inline void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, + std::int64_t lda) { detail::syr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda); } -static inline void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +static inline void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { detail::syr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -static inline void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +static inline void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { detail::syr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -static inline void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc) { +static inline void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc) { +static inline void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, - std::int64_t ldc) { +static inline void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, - std::int64_t ldc) { +static inline void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + double beta, sycl::buffer& c, std::int64_t ldc) { detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { +static inline void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -static inline void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { +static inline void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -static inline void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, +static inline void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -static inline void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, +static inline void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -static inline void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +static inline void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +static inline void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx) { detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +static inline void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +static inline void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx) { detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +static inline void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +static inline void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +static inline void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +static inline void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb) { detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb) { detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +static inline void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +static inline void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx) { +static inline void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx) { +static inline void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb) { detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb) { detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, +static inline void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, +static inline void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, +static inline void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, +static inline void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +static inline void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +static inline void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx) { +static inline void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx) { +static inline void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, +static inline void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, +static inline void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, +static inline void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -static inline void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, +static inline void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -static inline void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -static inline void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -static inline void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -static inline void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -static inline void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -static inline void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -static inline void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +static inline void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb); } -static inline void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +static inline void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb); } -static inline void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, +static inline void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb); } -static inline void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, +static inline void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb); } -static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +static inline void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +static inline void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +static inline void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +static inline void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -static inline void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &ab, std::int64_t lda, +static inline void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb); } -static inline void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &ab, std::int64_t lda, +static inline void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb); } -static inline void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +static inline void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb); } -static inline void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +static inline void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb); } -static inline void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +static inline void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -static inline void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { +static inline void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& c, std::int64_t ldc) { detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -static inline void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, sycl::buffer, 1>& c, std::int64_t ldc) { detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -static inline void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, sycl::buffer, 1>& c, std::int64_t ldc) { detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); @@ -1743,603 +1699,590 @@ static inline void omatadd(sycl::queue &queue, transpose transa, transpose trans // USM APIs -static inline sycl::event asum(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}) { auto done = detail::asum(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event asum(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}) { +static inline sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}) { auto done = detail::asum(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}) { auto done = detail::asum(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}) { +static inline sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}) { auto done = detail::asum(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event axpy(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy, dependencies); return done; } -static inline sycl::event axpy(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy, dependencies); return done; } -static inline sycl::event axpy(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy, dependencies); return done; } -static inline sycl::event axpy(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, double *alpha, - const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, float *alpha, - const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, - std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, - float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, - double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, std::int64_t stridex, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event axpby(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event axpby(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, - double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event axpby(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event axpby(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::copy(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::copy(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event copy(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::copy(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event copy(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::copy(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *result, - const std::vector &dependencies = {}) { +static inline sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}) { auto done = detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *result, - const std::vector &dependencies = {}) { +static inline sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}) { auto done = detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - double *result, - const std::vector &dependencies = {}) { +static inline sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}) { auto done = detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dotc(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}) { +static inline sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}) { auto done = detail::dotc(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dotc(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}) { +static inline sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}) { auto done = detail::dotc(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dotu(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}) { +static inline sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}) { auto done = detail::dotu(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dotu(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}) { +static inline sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}) { auto done = detail::dotu(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, - std::int64_t ldb, sycl::half beta, sycl::half *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, + std::int64_t ldb, sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const bfloat16 *a, std::int64_t lda, const bfloat16 *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const bfloat16* a, std::int64_t lda, const bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, const float **b, std::int64_t *ldb, - float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, + const float** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, const double **b, std::int64_t *ldb, - double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, + const double** b, std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch( - sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch( - sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, sycl::half *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, - sycl::half *beta, sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, + sycl::half** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + std::int32_t** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, - std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, double beta, double *c, - std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, + double beta, double* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2347,11 +2290,11 @@ static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, trans } static inline sycl::event gemm_batch( - sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}) { + sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2359,226 +2302,219 @@ static inline sycl::event gemm_batch( } static inline sycl::event gemm_batch( - sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}) { + sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, - std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, std::int64_t stride_c, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, std::int64_t stride_c, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - float beta, std::int32_t *c, std::int64_t ldc, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, + offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}) { auto done = detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -static inline sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, + offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}) { auto done = detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -static inline sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, - std::int64_t lda, std::uint8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, + offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}) { auto done = detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -static inline sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, - std::int64_t lda, std::uint8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, + offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}) { auto done = detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -static inline sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, - float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, - double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2586,11 +2522,11 @@ static inline sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::i } static inline sycl::event gemv_batch( - sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}) { + sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2598,1538 +2534,1508 @@ static inline sycl::event gemv_batch( } static inline sycl::event gemv_batch( - sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}) { + sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, - float *beta, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, + float* beta, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, - double *beta, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, + double* beta, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, + std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, + std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, + std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, + std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::ger(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::ger(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::gerc(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::gerc(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::geru(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::geru(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::hemm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::hemm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hemv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hemv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::her(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } -static inline sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::her(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } -static inline sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::her2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::her2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::her2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::her2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, - const std::complex *a, std::int64_t lda, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, + const std::complex* a, std::int64_t lda, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::herk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - const std::complex *a, std::int64_t lda, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, + const std::complex* a, std::int64_t lda, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::herk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hpmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hpmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies = {}) { +static inline sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies = {}) { auto done = detail::hpr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, dependencies); return done; } -static inline sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies = {}) { +static inline sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies = {}) { auto done = detail::hpr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, dependencies); return done; } -static inline sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, - const std::vector &dependencies = {}) { +static inline sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}) { auto done = detail::hpr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -static inline sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, - const std::vector &dependencies = {}) { +static inline sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}) { auto done = detail::hpr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -static inline sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamax(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamax(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamax(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamax(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamax(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamax(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamin(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamin(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamin(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamin(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamin(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamin(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event nrm2(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}) { auto done = detail::nrm2(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event nrm2(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}) { +static inline sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}) { auto done = detail::nrm2(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}) { auto done = detail::nrm2(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}) { +static inline sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}) { auto done = detail::nrm2(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - float c, float s, - const std::vector &dependencies = {}) { +static inline sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, + float s, const std::vector& dependencies = {}) { auto done = detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s, dependencies); return done; } -static inline sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - double c, double s, - const std::vector &dependencies = {}) { +static inline sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + double c, double s, + const std::vector& dependencies = {}) { auto done = detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s, dependencies); return done; } -static inline sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies = {}) { +static inline sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies = {}) { auto done = detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s, dependencies); return done; } -static inline sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, - double s, const std::vector &dependencies = {}) { +static inline sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies = {}) { auto done = detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s, dependencies); return done; } -static inline sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}) { +static inline sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}) { auto done = detail::rotg(get_device_id(queue), queue, a, b, c, s, dependencies); return done; } -static inline sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, - double *s, - const std::vector &dependencies = {}) { +static inline sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}) { auto done = detail::rotg(get_device_id(queue), queue, a, b, c, s, dependencies); return done; } -static inline sycl::event rotg(sycl::queue &queue, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies = {}) { +static inline sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, + float* c, std::complex* s, + const std::vector& dependencies = {}) { auto done = detail::rotg(get_device_id(queue), queue, a, b, c, s, dependencies); return done; } -static inline sycl::event rotg(sycl::queue &queue, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies = {}) { +static inline sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, + double* c, std::complex* s, + const std::vector& dependencies = {}) { auto done = detail::rotg(get_device_id(queue), queue, a, b, c, s, dependencies); return done; } -static inline sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies = {}) { +static inline sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, float* param, + const std::vector& dependencies = {}) { auto done = detail::rotm(get_device_id(queue), queue, n, x, incx, y, incy, param, dependencies); return done; } -static inline sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies = {}) { +static inline sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, double* param, + const std::vector& dependencies = {}) { auto done = detail::rotm(get_device_id(queue), queue, n, x, incx, y, incy, param, dependencies); return done; } -static inline sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, - const std::vector &dependencies = {}) { +static inline sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, + float* param, const std::vector& dependencies = {}) { auto done = detail::rotmg(get_device_id(queue), queue, d1, d2, x1, y1, param, dependencies); return done; } -static inline sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, - double y1, double *param, - const std::vector &dependencies = {}) { +static inline sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, + double* param, const std::vector& dependencies = {}) { auto done = detail::rotmg(get_device_id(queue), queue, d1, d2, x1, y1, param, dependencies); return done; } -static inline sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::sbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::sbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, float *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, double *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies = {}) { auto done = detail::sdsdot(get_device_id(queue), queue, n, sb, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::spmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::spmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies = {}) { +static inline sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* a, + const std::vector& dependencies = {}) { auto done = detail::spr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, dependencies); return done; } -static inline sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies = {}) { +static inline sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* a, + const std::vector& dependencies = {}) { auto done = detail::spr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, dependencies); return done; } -static inline sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies = {}) { +static inline sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies = {}) { auto done = detail::spr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -static inline sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}) { +static inline sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, + const std::vector& dependencies = {}) { auto done = detail::spr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -static inline sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::swap(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::swap(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::swap(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::swap(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::symv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::symv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::syr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } -static inline sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::syr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } -static inline sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::syr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::syr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, - float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, + std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, - double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, + std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - float beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - double beta, double *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, - std::int64_t *m, std::int64_t *n, float *alpha, - const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, - std::int64_t *m, std::int64_t *n, double *alpha, - const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -static inline sycl::event trsm_batch( - sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, diag *unit_diag, - std::int64_t *m, std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -static inline sycl::event trsm_batch( - sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, diag *unit_diag, - std::int64_t *m, std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -static inline sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, +static inline sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, +static inline sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, +static inline sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -static inline sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, +static inline sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -static inline sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -static inline sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -static inline sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, +static inline sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - double beta, const double *b, std::int64_t ldb, - std::int64_t stride_b, double *c, std::int64_t ldc, + const double* a, std::int64_t lda, std::int64_t stride_a, + double beta, const double* b, std::int64_t ldb, + std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4137,185 +4043,185 @@ static inline sycl::event omatadd_batch(sycl::queue &queue, transpose transa, tr } static inline sycl::event omatadd_batch( - sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}) { + sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}) { auto done = detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex *c, + const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - float *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - double *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, +static inline sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, +static inline sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -static inline sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, +static inline sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } -static inline sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, +static inline sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } -static inline sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } -static inline sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } -static inline sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, std::int64_t ldb, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, std::int64_t ldb, + float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); return done; } -static inline sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, std::int64_t ldb, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, std::int64_t ldb, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); return done; } -static inline sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { + const std::complex* a, std::int64_t lda, + std::complex beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); return done; } -static inline sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { + const std::complex* a, std::int64_t lda, + std::complex beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); return done; diff --git a/include/oneapi/math/blas/detail/blas_ct_backends.hxx b/include/oneapi/math/blas/detail/blas_ct_backends.hxx index afebb93c3..cd03497d6 100644 --- a/include/oneapi/math/blas/detail/blas_ct_backends.hxx +++ b/include/oneapi/math/blas/detail/blas_ct_backends.hxx @@ -20,2892 +20,2813 @@ // Buffer APIs static inline void syr2(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); static inline void syr2(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); static inline void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& x, std::int64_t incx); static inline void trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); static inline void trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); static inline void trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); static inline void tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); static inline void tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); static inline void spr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); static inline void spr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); static inline void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); static inline void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + float beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); static inline void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + double beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); static inline void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void her2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); static inline void her2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); static inline void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s); static inline void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s); static inline void rot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, float c, float s); static inline void rot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, double c, double s); static inline void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void axpy(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); static inline void axpy(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); static inline void axpy_batch(backend_selector selector, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + float alpha, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void axpy_batch(backend_selector selector, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + double alpha, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); static inline void axpby(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); static inline void axpby(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); static inline void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); static inline void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); static inline void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); static inline void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); static inline void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); static inline void gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + float beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size); + std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); static inline void dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, + std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); static inline void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); static inline void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); static inline void her(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, + std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); static inline void her(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, + std::int64_t n, double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); static inline void hpr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); + std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); static inline void hpr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); + std::int64_t n, double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); static inline void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, + sycl::half alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void spmv(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); + std::int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void spmv(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + std::int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); static inline void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); static inline void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); static inline void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); static inline void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); static inline void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); static inline void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); static inline void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); static inline void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); static inline void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc); + sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void herk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); static inline void herk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); static inline void ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); static inline void ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); static inline void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); static inline void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); static inline void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); static inline void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); static inline void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); static inline void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); static inline void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); static inline void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); static inline void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); static inline void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); static inline void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); static inline void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); static inline void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); static inline void syr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); static inline void syr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); static inline void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -static inline void rotmg(backend_selector selector, - sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, - sycl::buffer ¶m); +static inline void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param); -static inline void rotmg(backend_selector selector, - sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, - sycl::buffer ¶m); +static inline void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param); static inline void tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); static inline void tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); static inline void tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); static inline void trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); static inline void trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); static inline void trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); static inline void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); static inline void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); static inline void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); static inline void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void sbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void sbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); static inline void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); static inline void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); static inline void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void spr2(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); static inline void spr2(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); static inline void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); static inline void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); -static inline void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); +static inline void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); -static inline void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); +static inline void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); static inline void rotg(backend_selector selector, - sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); + sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); static inline void rotg(backend_selector selector, - sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); + sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); static inline void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); static inline void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); static inline void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, double beta, sycl::buffer, 1>& c, + std::int64_t ldc); static inline void dot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); static inline void dot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); static inline void dot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); static inline void symv(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void symv(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); static inline void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); static inline void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); static inline void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); static inline void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); static inline void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void omatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); static inline void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void omatcopy2(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &b, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); static inline void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb); + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); static inline void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); static inline void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); static inline void imatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); static inline void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb); + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); static inline void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); static inline void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); static inline void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); static inline void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); static inline void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); static inline void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // USM APIs static inline sycl::event syr2(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event syr2(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - float alpha, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + float alpha, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - double alpha, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + double alpha, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + std::complex alpha, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - float alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + float alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - double alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + double alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const float *a, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const float* a, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const double *a, - double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const double* a, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event spr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *a, const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* a, const std::vector& dependencies = {}); static inline sycl::event spr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *a, const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + double* a, const std::vector& dependencies = {}); static inline sycl::event hpmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event hpmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, + std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event syrk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); static inline sycl::event syrk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies = {}); static inline sycl::event syrk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); static inline sycl::event syrk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, + double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, std::int64_t stride_a, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, + double beta, double* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); -static inline sycl::event syrk_batch(backend_selector selector, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); +static inline sycl::event syrk_batch(backend_selector selector, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event her2(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event her2(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event hbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event hbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); static inline sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, double s, - const std::vector &dependencies = {}); + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); -static inline sycl::event rot(backend_selector selector, std::int64_t n, - float *x, std::int64_t incx, float *y, std::int64_t incy, float c, - float s, const std::vector &dependencies = {}); +static inline sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); static inline sycl::event rot(backend_selector selector, std::int64_t n, - double *x, std::int64_t incx, double *y, std::int64_t incy, - double c, double s, - const std::vector &dependencies = {}); + double* x, std::int64_t incx, double* y, std::int64_t incy, double c, + double s, const std::vector& dependencies = {}); static inline sycl::event axpy(backend_selector selector, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); + float alpha, const float* x, std::int64_t incx, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpy(backend_selector selector, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); + double alpha, const double* x, std::int64_t incx, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t *n, float *alpha, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t *n, double *alpha, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + float* alpha, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + double* alpha, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t n, + float alpha, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t n, + double alpha, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event axpby(backend_selector selector, std::int64_t n, - float alpha, const float *x, std::int64_t incx, - const float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + float alpha, const float* x, std::int64_t incx, const float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpby(backend_selector selector, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + double alpha, const double* x, std::int64_t incx, const double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gerc(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event gerc(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event syr2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event syr2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event syr2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event syr2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch(backend_selector selector, - transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float beta, float* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -static inline sycl::event gemv_batch(backend_selector selector, - transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +static inline sycl::event gemv_batch(backend_selector selector, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event gemv_batch( backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event gemv_batch( backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch(backend_selector selector, - transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch(backend_selector selector, - transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch(backend_selector selector, - transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch( - backend_selector selector, transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, - float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, + const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float* beta, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, + const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double* beta, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side left_right, + std::int64_t m, std::int64_t n, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); -static inline sycl::event dgmm_batch(backend_selector selector, - side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, - double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side *left_right, std::int64_t *m, std::int64_t *n, - const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side *left_right, std::int64_t *m, std::int64_t *n, - const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); +static inline sycl::event dgmm_batch(backend_selector selector, side left_right, + std::int64_t m, std::int64_t n, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, + double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); static inline sycl::event her(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const std::complex *x, - std::int64_t incx, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const std::complex* x, + std::int64_t incx, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event her(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const std::complex *x, - std::int64_t incx, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const std::complex* x, + std::int64_t incx, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event hpr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const std::complex *x, - std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const std::complex* x, + std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); static inline sycl::event hpr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const std::complex *x, - std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const std::complex* x, + std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); static inline sycl::event iamin(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamin(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, const float** b, std::int64_t* ldb, + float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, const double** b, std::int64_t* ldb, + double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, sycl::half* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, + sycl::half* beta, sycl::half** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, + float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, + float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, + float* beta, std::int32_t** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); -static inline sycl::event gemm_batch( - backend_selector selector, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, sycl::half *alpha, - const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, - float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, - float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, - float *beta, std::int32_t **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double beta, double *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); +static inline sycl::event gemm_batch(backend_selector selector, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, + std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event gemm_batch( backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event gemm_batch( backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -static inline sycl::event gemm_batch( - backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); +static inline sycl::event gemm_batch(backend_selector selector, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::half* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, + std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, + std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, + std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event spmv(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *a, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* a, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event spmv(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *a, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* a, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event swap(backend_selector selector, std::int64_t n, - float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event swap(backend_selector selector, std::int64_t n, - double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event geru(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event geru(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); static inline sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); static inline sycl::event nrm2(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); static inline sycl::event nrm2(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, sycl::half beta, - sycl::half *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const bfloat16 *a, std::int64_t lda, - const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const bfloat16* a, std::int64_t lda, const bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event herk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const std::complex *a, std::int64_t lda, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const std::complex* a, std::int64_t lda, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event herk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const std::complex *a, std::int64_t lda, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const std::complex* a, std::int64_t lda, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event ger(backend_selector selector, std::int64_t m, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event ger(backend_selector selector, std::int64_t m, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event trsm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trsm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trsm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trsm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, - const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, + int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, + int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, int64_t m, + int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, int64_t m, + int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, + int64_t* m, int64_t* n, float* alpha, const float** a, + int64_t* lda, float** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, + int64_t* m, int64_t* n, double* alpha, const double** a, + int64_t* lda, double** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, + int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, + int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); static inline sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); static inline sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); static inline sycl::event hemm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event hemm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event hpr2(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); static inline sycl::event hpr2(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); static inline sycl::event gbmv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gbmv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gbmv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gbmv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event tbmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event symm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event symm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event symm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event symm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); static inline sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); static inline sycl::event syr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event syr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + double* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event trmm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trmm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trmm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trmm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -static inline sycl::event rotmg(backend_selector selector, float *d1, - float *d2, float *x1, float y1, float *param, - const std::vector &dependencies = {}); +static inline sycl::event rotmg(backend_selector selector, float* d1, float* d2, + float* x1, float y1, float* param, + const std::vector& dependencies = {}); -static inline sycl::event rotmg(backend_selector selector, double *d1, - double *d2, double *x1, double y1, double *param, - const std::vector &dependencies = {}); +static inline sycl::event rotmg(backend_selector selector, double* d1, double* d2, + double* x1, double y1, double* param, + const std::vector& dependencies = {}); static inline sycl::event tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const float *a, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const float* a, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const double *a, - double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const double* a, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event copy(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event copy(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t *n, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t *n, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t n, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t stridex, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event hemv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event hemv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_bias( - backend_selector selector, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -static inline sycl::event gemm_bias( - backend_selector selector, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -static inline sycl::event gemm_bias( - backend_selector selector, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t *a, int64_t lda, - std::uint8_t ao, const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, - int64_t ldc, const std::int32_t *co, const std::vector &dependencies = {}); - -static inline sycl::event gemm_bias(backend_selector selector, - transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, - const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_bias(backend_selector selector, transpose transa, + transpose transb, offset offsetc, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, + std::int8_t ao, const std::uint8_t* b, int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_bias(backend_selector selector, transpose transa, + transpose transb, offset offsetc, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, + std::int8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_bias(backend_selector selector, transpose transa, + transpose transb, offset offsetc, int64_t m, int64_t n, + int64_t k, float alpha, const std::uint8_t* a, int64_t lda, + std::uint8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_bias(backend_selector selector, transpose transa, + transpose transb, offset offsetc, int64_t m, int64_t n, + int64_t k, float alpha, const std::uint8_t* a, int64_t lda, + std::uint8_t ao, const std::uint8_t* b, int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); static inline sycl::event sbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event sbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); static inline sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); static inline sycl::event asum(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); static inline sycl::event asum(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); static inline sycl::event tbsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event spr2(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, + const std::vector& dependencies = {}); static inline sycl::event spr2(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, + const std::vector& dependencies = {}); static inline sycl::event iamax(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamax(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event rotm(backend_selector selector, std::int64_t n, - float *x, std::int64_t incx, float *y, std::int64_t incy, - float *param, - const std::vector &dependencies = {}); + float* x, std::int64_t incx, float* y, std::int64_t incy, + float* param, const std::vector& dependencies = {}); static inline sycl::event rotm(backend_selector selector, std::int64_t n, - double *x, std::int64_t incx, double *y, std::int64_t incy, - double *param, - const std::vector &dependencies = {}); + double* x, std::int64_t incx, double* y, std::int64_t incy, + double* param, const std::vector& dependencies = {}); -static inline sycl::event rotg(backend_selector selector, float *a, float *b, - float *c, float *s, - const std::vector &dependencies = {}); +static inline sycl::event rotg(backend_selector selector, float* a, float* b, + float* c, float* s, + const std::vector& dependencies = {}); -static inline sycl::event rotg(backend_selector selector, double *a, - double *b, double *c, double *s, - const std::vector &dependencies = {}); +static inline sycl::event rotg(backend_selector selector, double* a, double* b, + double* c, double* s, + const std::vector& dependencies = {}); -static inline sycl::event rotg(backend_selector selector, - std::complex *a, std::complex *b, float *c, - std::complex *s, - const std::vector &dependencies = {}); +static inline sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies = {}); -static inline sycl::event rotg(backend_selector selector, - std::complex *a, std::complex *b, double *c, - std::complex *s, - const std::vector &dependencies = {}); +static inline sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies = {}); static inline sycl::event sdsdot(backend_selector selector, std::int64_t n, - float sb, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}); + float sb, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* result, + const std::vector& dependencies = {}); static inline sycl::event her2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event her2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event dot(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies = {}); static inline sycl::event dot(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* result, + const std::vector& dependencies = {}); static inline sycl::event dot(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, double *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + double* result, const std::vector& dependencies = {}); static inline sycl::event symv(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event symv(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, + float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, + double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - float alpha, float *ab, std::int64_t lda, std::int64_t ldb, + float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - double alpha, double *ab, std::int64_t lda, + double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, + std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, + std::int64_t n, double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double *c, std::int64_t ldc, std::int64_t stride_c, + const double* b, std::int64_t ldb, std::int64_t stride_b, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatadd_batch( backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); static inline sycl::event omatadd_batch( backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, + std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, const std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatcopy2(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, float *b, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy2(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float beta, const float *b, - std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const float* a, std::int64_t lda, float beta, const float* b, + std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double beta, const double *b, - std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const double* a, std::int64_t lda, double beta, const double* b, + std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, diff --git a/include/oneapi/math/blas/detail/blas_loader.hxx b/include/oneapi/math/blas/detail/blas_loader.hxx index 9732218de..f37cc32a9 100644 --- a/include/oneapi/math/blas/detail/blas_loader.hxx +++ b/include/oneapi/math/blas/detail/blas_loader.hxx @@ -19,2681 +19,2571 @@ // Buffer APIs -ONEMATH_EXPORT void herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMATH_EXPORT void herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx); -ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx); -ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer, 1> &x, - std::int64_t incx); -ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx); - -ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); -ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); -ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); -ONEMATH_EXPORT void spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); - -ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size); -ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size); -ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); -ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); - -ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); -ONEMATH_EXPORT void her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -ONEMATH_EXPORT void hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); -ONEMATH_EXPORT void hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, - float s); -ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, - double s); -ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s); -ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s); - -ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy); -ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); -ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); -ONEMATH_EXPORT void gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMATH_EXPORT void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMATH_EXPORT void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx); +ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx); +ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx); +ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx); +ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx); +ONEMATH_EXPORT void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); +ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMATH_EXPORT void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); +ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); +ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMATH_EXPORT void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); +ONEMATH_EXPORT void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); + +ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); +ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); +ONEMATH_EXPORT void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMATH_EXPORT void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + float beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + double beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); +ONEMATH_EXPORT void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +ONEMATH_EXPORT void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); +ONEMATH_EXPORT void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); + +ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, + float s); +ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, + double s); +ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, float c, float s); +ONEMATH_EXPORT void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, double c, double s); + +ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy); +ONEMATH_EXPORT void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy); + +ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); +ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); +ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); +ONEMATH_EXPORT void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); +ONEMATH_EXPORT void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); +ONEMATH_EXPORT void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc); +ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); +ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMATH_EXPORT void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); +ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); + +ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + float beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + double beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); + +ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); + +ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size); + +ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size); + +ONEMATH_EXPORT void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, + std::int64_t lda); +ONEMATH_EXPORT void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, + std::int64_t lda); + +ONEMATH_EXPORT void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); +ONEMATH_EXPORT void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); + +ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); +ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); +ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); +ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); + +ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); +ONEMATH_EXPORT void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); + +ONEMATH_EXPORT void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void rotmg(oneapi::math::device libkey, sycl::queue& queue, + sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param); +ONEMATH_EXPORT void rotmg(oneapi::math::device libkey, sycl::queue& queue, + sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param); + +ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); +ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); +ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); +ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); +ONEMATH_EXPORT void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); -ONEMATH_EXPORT void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); +ONEMATH_EXPORT void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + +ONEMATH_EXPORT void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); +ONEMATH_EXPORT void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); + +ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); +ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +ONEMATH_EXPORT void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); +ONEMATH_EXPORT void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + +ONEMATH_EXPORT void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMATH_EXPORT void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); +ONEMATH_EXPORT void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); + +ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); +ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); +ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); +ONEMATH_EXPORT void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + +ONEMATH_EXPORT void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); +ONEMATH_EXPORT void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + +ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); +ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +ONEMATH_EXPORT void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); +ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); +ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); +ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); +ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); +ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); +ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); + +ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); + +ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); +ONEMATH_EXPORT void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); + +ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMATH_EXPORT void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); +ONEMATH_EXPORT void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a); + +ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); +ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); +ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); +ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + +ONEMATH_EXPORT void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); +ONEMATH_EXPORT void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); + +ONEMATH_EXPORT void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); +ONEMATH_EXPORT void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); +ONEMATH_EXPORT void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); + +ONEMATH_EXPORT void sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float sb, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + +ONEMATH_EXPORT void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, float beta, sycl::buffer, 1>& c, + std::int64_t ldc); +ONEMATH_EXPORT void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, double beta, sycl::buffer, 1>& c, + std::int64_t ldc); + +ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); +ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue& queue, + sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); +ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue& queue, + sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); +ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue& queue, + sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); + +ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); +ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); +ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); +ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); + +ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size); +ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size); +ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, + std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); +ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, + std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); +ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); +ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); +ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); +ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); +ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); +ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t strideb); +ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size); - -ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - -ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size); - -ONEMATH_EXPORT void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size); - -ONEMATH_EXPORT void her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, - std::int64_t lda); -ONEMATH_EXPORT void her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, - std::int64_t lda); - -ONEMATH_EXPORT void hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); -ONEMATH_EXPORT void hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); - -ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); -ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); -ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); -ONEMATH_EXPORT void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); - -ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); -ONEMATH_EXPORT void hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void rotmg(oneapi::math::device libkey, sycl::queue &queue, - sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, - sycl::buffer ¶m); -ONEMATH_EXPORT void rotmg(oneapi::math::device libkey, sycl::queue &queue, - sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, - sycl::buffer ¶m); - -ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); -ONEMATH_EXPORT void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); -ONEMATH_EXPORT void geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMATH_EXPORT void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); -ONEMATH_EXPORT void syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMATH_EXPORT void ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); -ONEMATH_EXPORT void ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); -ONEMATH_EXPORT void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -ONEMATH_EXPORT void dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); -ONEMATH_EXPORT void dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - -ONEMATH_EXPORT void hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); -ONEMATH_EXPORT void hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); -ONEMATH_EXPORT void hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); - -ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); -ONEMATH_EXPORT void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMATH_EXPORT void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); -ONEMATH_EXPORT void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); -ONEMATH_EXPORT void dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - -ONEMATH_EXPORT void syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); -ONEMATH_EXPORT void syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); - -ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); -ONEMATH_EXPORT void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -ONEMATH_EXPORT void symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); -ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); -ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMATH_EXPORT void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMATH_EXPORT void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); -ONEMATH_EXPORT void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); -ONEMATH_EXPORT void hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); -ONEMATH_EXPORT void sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMATH_EXPORT void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMATH_EXPORT void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); -ONEMATH_EXPORT void spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - -ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); -ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); -ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMATH_EXPORT void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); -ONEMATH_EXPORT void rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - -ONEMATH_EXPORT void dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); -ONEMATH_EXPORT void dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); -ONEMATH_EXPORT void dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - -ONEMATH_EXPORT void sdsdot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - float sb, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - -ONEMATH_EXPORT void her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMATH_EXPORT void her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue &queue, - sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); -ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue &queue, - sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); -ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue &queue, - sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); -ONEMATH_EXPORT void rotg(oneapi::math::device libkey, sycl::queue &queue, - sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); - -ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMATH_EXPORT void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size); -ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size); -ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMATH_EXPORT void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - -ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); -ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); -ONEMATH_EXPORT void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &b, - std::int64_t ldb, std::int64_t strideb); -ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t strideb); + +ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb); -ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t strideb); -ONEMATH_EXPORT void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer, 1>& ab, std::int64_t lda, + std::int64_t ldb); +ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t strideb); - -ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, - std::int64_t lda, std::int64_t ldb); -ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb); -ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, - std::int64_t ldb); -ONEMATH_EXPORT void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, - std::int64_t ldb); - -ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, - std::int64_t ldc); -ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, - std::int64_t ldc); -ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& ab, std::int64_t lda, + std::int64_t ldb); + +ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, + std::int64_t ldc); +ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc); +ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMATH_EXPORT void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // USM APIs -ONEMATH_EXPORT sycl::event herk(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event herk(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spr(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event spr(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const std::complex* a, std::int64_t lda, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const std::complex* a, std::int64_t lda, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const float* a, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const double* a, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* a, const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + double* a, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, sycl::half* alpha, + const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + std::int32_t** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, const float* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, + std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event gemm_batch( - oneapi::math::device libkey, sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, sycl::half *alpha, - const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double beta, double *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch( - oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, + oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event gemm_batch( - oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, + oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch( - oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - float beta, std::int32_t *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event her2(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event her2(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hbmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hbmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, double s, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, float *alpha, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, double *alpha, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerc(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gerc(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemv_batch( - oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, std::int32_t* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, float beta, float* c, + std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, double beta, double* c, + std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, + uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, + uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, + uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, + uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, + uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, + uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, + uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, + uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float* x, std::int64_t incx, float* y, std::int64_t incy, float c, + float s, const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double* x, std::int64_t incx, double* y, std::int64_t incy, double c, + double s, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, float* alpha, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, double* alpha, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, const double* x, std::int64_t incx, + const double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, + float beta, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, + double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event gemv_batch( - oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); + oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event gemv_batch( - oneapi::math::device libkey, sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, - side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, - float *c, std::int64_t ldc, std::int64_t stridec, + oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float* beta, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, + side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, + float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, + side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, + double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, + side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, + side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, + side* left_right, std::int64_t* m, std::int64_t* n, + const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, + side* left_right, std::int64_t* m, std::int64_t* n, + const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, + side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, + side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const std::complex* x, + std::int64_t incx, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const std::complex* x, + std::int64_t incx, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const std::complex* x, + std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const std::complex* x, + std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, + std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* a, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* a, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, float* d1, + float* d2, float* x1, float y1, float* param, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, double* d1, + double* d2, double* x1, double y1, double* param, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const bfloat16* a, std::int64_t lda, const bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, const std::int8_t* a, + int64_t lda, std::int8_t ao, const std::uint8_t* b, + int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, + int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, const std::int8_t* a, + int64_t lda, std::int8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, const std::uint8_t* a, + int64_t lda, std::uint8_t ao, const std::int8_t* b, + int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, + int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, const std::uint8_t* a, + int64_t lda, std::uint8_t ao, const std::uint8_t* b, + int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, + int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, + side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, + int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, + side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, + int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, + side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, std::complex* b, + int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, + side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, std::complex* b, + int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, float** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, double** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const float* a, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const double* a, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float* x, std::int64_t incx, float* y, std::int64_t incy, + float* param, const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double* x, std::int64_t incx, double* y, std::int64_t incy, + double* param, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* result, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + double* result, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float sb, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, float* a, float* b, + float* c, float* s, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, double* a, + double* b, double* c, double* s, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, + std::complex* a, std::complex* b, float* c, + std::complex* s, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, + std::complex* a, std::complex* b, double* c, + std::complex* s, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + float alpha, float* ab, std::int64_t lda, + std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + double alpha, double* ab, std::int64_t lda, + std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, + std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, + std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, - side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, - double *c, std::int64_t ldc, std::int64_t stridec, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, - side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, - side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, - side *left_right, std::int64_t *m, std::int64_t *n, - const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, - side *left_right, std::int64_t *m, std::int64_t *n, - const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, - side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, - side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event her(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event her(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hpr(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hpr(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hpmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hpmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *a, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event spmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *a, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotmg(oneapi::math::device libkey, sycl::queue &queue, float *d1, - float *d2, float *x1, float y1, float *param, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event rotmg(oneapi::math::device libkey, sycl::queue &queue, double *d1, - double *d2, double *x1, double y1, double *param, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geru(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geru(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - double *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatadd_batch( + oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatadd_batch( + oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, std::int64_t stridea, + float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, std::int64_t stridea, + double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, + std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + std::complex* b, std::int64_t ldb, + std::int64_t strideb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, float alpha, + float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, double alpha, + double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, + std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, + std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, - std::int64_t ldb, sycl::half beta, sycl::half *c, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, - const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_bias( - oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, int64_t lda, - std::int8_t ao, const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, - int64_t ldc, const std::int32_t *co, const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, - const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::int8_t *b, int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, - const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr2(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syr2(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, - std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ger(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ger(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, - std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, - const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dotu(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dotu(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hemm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hemm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hpr2(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hpr2(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, + const std::complex* a, std::int64_t lda, + std::complex beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dotc(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dotc(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syr(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event symv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event symv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, int64_t group_count, - int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, int64_t group_count, - int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, int64_t group_count, - int64_t *group_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, int64_t group_count, - int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hemv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hemv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sbmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sbmv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - double *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spr2(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event spr2(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotm(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float *param, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event rotm(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, double *param, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dot(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dot(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event dot(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, double *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sdsdot(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float sb, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event her2k(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event her2k(oneapi::math::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, float *a, - float *b, float *c, float *s, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, double *a, - double *b, double *c, double *s, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, - std::complex *a, std::complex *b, float *c, - std::complex *s, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, - std::complex *a, std::complex *b, double *c, - std::complex *s, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - double alpha, double *ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, - std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, - std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd_batch( - oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd_batch( - oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, float *b, - std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, - std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, - std::int64_t strideb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, - std::int64_t strideb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, - std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, - std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float beta, const float *b, - std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double beta, const double *b, - std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, - transpose* trans, std::int64_t* m, std::int64_t* n, - float* alpha, const float** a, std::int64_t* lda, - float** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies = {}); + transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, - transpose* trans, std::int64_t* m, std::int64_t* n, - double* alpha, const double** a, std::int64_t* lda, - double** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies = {}); + transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, - transpose* trans, std::int64_t* m, std::int64_t* n, - std::complex* alpha, const std::complex** a, - std::int64_t* lda, std::complex** b, - std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies = {}); + transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, - transpose* trans, std::int64_t* m, std::int64_t* n, - std::complex* alpha, - const std::complex** a, std::int64_t* lda, - std::complex** b, std::int64_t* ldb, - std::int64_t group_count, std::int64_t* groupsize, - const std::vector& dependencies = {}); + transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, - transpose* trans, std::int64_t* m, std::int64_t* n, - float* alpha, float** ab, std::int64_t* lda, - std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies = {}); + transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, float** ab, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, - transpose* trans, std::int64_t* m, std::int64_t* n, - double* alpha, double** ab, std::int64_t* lda, - std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies = {}); + transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, double** ab, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, - transpose* trans, std::int64_t* m, std::int64_t* n, - std::complex* alpha, std::complex** ab, - std::int64_t* lda, std::int64_t* ldb, - std::int64_t group_count, std::int64_t* groupsize, - const std::vector& dependencies = {}); + transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, + std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, - transpose* trans, std::int64_t* m, std::int64_t* n, - std::complex* alpha, std::complex** ab, - std::int64_t* lda, std::int64_t* ldb, - std::int64_t group_count, std::int64_t* groupsize, - const std::vector& dependencies = {}); + transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, + std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies = {}); diff --git a/include/oneapi/math/blas/detail/cublas/blas_ct.hxx b/include/oneapi/math/blas/detail/cublas/blas_ct.hxx index c9ac759ab..4a0898f9a 100644 --- a/include/oneapi/math/blas/detail/cublas/blas_ct.hxx +++ b/include/oneapi/math/blas/detail/cublas/blas_ct.hxx @@ -18,1991 +18,1955 @@ **************************************************************************/ void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, - std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, + std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - double beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { + oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { + oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { - oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { - oneapi::math::blas::cublas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { + oneapi::math::blas::cublas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, + result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size) { + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a, - std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { - oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { + oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { + oneapi::math::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { - oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::cublas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -2010,11 +1974,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2022,11 +1985,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2034,12 +1996,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2047,12 +2008,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2060,679 +2020,665 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const float* x, std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const double* x, std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2740,12 +2686,11 @@ sycl::event gemm_batch(backend_selector selector, transpose *tr } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2753,12 +2698,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2766,13 +2710,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2780,13 +2723,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2794,12 +2736,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2808,10 +2750,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2820,10 +2762,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2832,10 +2774,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2843,185 +2785,180 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = - oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3029,11 +2966,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3041,11 +2978,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3053,11 +2990,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3065,89 +3002,87 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, - incy, a, lda, dependencies); + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, - incy, a, lda, dependencies); + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3155,11 +3090,10 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3167,11 +3101,11 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3179,62 +3113,57 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3242,779 +3171,759 @@ sycl::event trsm_batch(backend_selector selector, side *left_ri } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, - dependencies); +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, - dependencies); +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, - incy, result, dependencies); + incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4022,10 +3931,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4034,9 +3943,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4045,9 +3954,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4055,18 +3964,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4074,9 +3983,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4084,9 +3993,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4094,10 +4003,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4106,10 +4015,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4118,11 +4027,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4131,11 +4040,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4143,150 +4052,150 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } @@ -4295,9 +4204,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* lda, float** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4306,9 +4215,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* lda, double** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4316,11 +4225,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4328,33 +4236,30 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::cublas::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } @@ -4364,8 +4269,8 @@ sycl::event imatcopy_batch(backend_selector selector, transpose std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } @@ -4375,7 +4280,7 @@ sycl::event imatcopy_batch(backend_selector selector, transpose std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } diff --git a/include/oneapi/math/blas/detail/cublas/onemath_blas_cublas.hxx b/include/oneapi/math/blas/detail/cublas/onemath_blas_cublas.hxx index 1141eb238..caa75a646 100644 --- a/include/oneapi/math/blas/detail/cublas/onemath_blas_cublas.hxx +++ b/include/oneapi/math/blas/detail/cublas/onemath_blas_cublas.hxx @@ -19,2314 +19,2211 @@ // Buffer APIs -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void axpy(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size); - -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -void axpby(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -void axpby(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, float c, float s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - double c, double s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, double c, + double s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, float c, float s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, double c, double s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param); -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param); -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void sdsdot(sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); - -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); - -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); + +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); + +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); + +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, double beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); - -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); - -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size); - -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size); - -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); - -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); - -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size); -void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size); -void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size); -void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size); -void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); -void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); + +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); -void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); -void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); +void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); +void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); -void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a); -void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); -void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, - std::int64_t lda); +void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda); -void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); +void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); +void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - float beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - double beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb); - -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb); - -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); + +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); + +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc); + +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size); +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size); +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc); // USM APIs -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies = {}); - -sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies = {}); - -sycl::event axpy(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpy(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, double *alpha, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event axpby(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpby(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); - -sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); - -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); - -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies = {}); + +sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies = {}); + +sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, double c, - double s, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, - float *c, std::complex *s, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, - double *c, std::complex *s, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float *param, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, - double *y, std::int64_t incy, double *param, - const std::vector &dependencies = {}); - -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, - float *param, const std::vector &dependencies = {}); - -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double y1, - double *param, const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); - -sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, - double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, float beta, - float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, double beta, - double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float *beta, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double *beta, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, - double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *a, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *a, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *a, - const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, - double *a, const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, - double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, sycl::half beta, - sycl::half *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const std::complex *a, std::int64_t lda, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, double beta, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, - std::int64_t *n, std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, - std::int64_t *n, std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, float *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, double *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, const float **b, std::int64_t *ldb, - float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, const double **b, std::int64_t *ldb, - double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, sycl::half *alpha, - const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, sycl::half *beta, sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, - std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, const float *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies = {}); + +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies = {}); + +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); + +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); + +sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); + +sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); + +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}); + +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}); + +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies = {}); + +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies = {}); + +sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, float* param, + const std::vector& dependencies = {}); + +sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, double* param, + const std::vector& dependencies = {}); + +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, float* param, + const std::vector& dependencies = {}); + +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, double* param, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); + +sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float beta, + float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double beta, + double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex beta, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex beta, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float* beta, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double* beta, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, std::int64_t* n, + const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, std::int64_t* n, + const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, + double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* a, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* a, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, + const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda, + const bfloat16* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, + float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, + double* beta, double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, double* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, std::int64_t* m, std::int64_t* n, float* alpha, + const float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, std::int64_t* m, std::int64_t* n, double* alpha, + const double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, const float** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, const double** b, std::int64_t* ldb, double* beta, + double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, sycl::half* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, sycl::half* beta, + sycl::half** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, + std::int32_t** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, std::int64_t ldb, + std::int64_t stride_b, double beta, double* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, sycl::half* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, - std::int64_t ldb, std::int64_t stride_b, float beta, std::int32_t *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, + std::int64_t ldb, std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, + std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, + std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::int8_t* b, std::int64_t ldb, std::int8_t bo, float beta, + std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, float beta, + std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, std::int64_t stridea, float *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, std::int64_t stridea, double *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float beta, const float *b, - int64_t ldb, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double beta, const double *b, - int64_t ldb, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, float* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, std::int64_t stridea, float* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, std::int64_t stridea, double* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, float beta, const float* b, + int64_t ldb, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, double beta, const double* b, + int64_t ldb, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, diff --git a/include/oneapi/math/blas/detail/mklcpu/blas_ct.hxx b/include/oneapi/math/blas/detail/mklcpu/blas_ct.hxx index 0d34723db..dbc1554fe 100644 --- a/include/oneapi/math/blas/detail/mklcpu/blas_ct.hxx +++ b/include/oneapi/math/blas/detail/mklcpu/blas_ct.hxx @@ -20,1991 +20,1955 @@ // Buffer APIs void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, - std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, + std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - double beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { + oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { + oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { - oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { - oneapi::math::blas::mklcpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { + oneapi::math::blas::mklcpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, + result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size) { + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a, - std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { - oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { + oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { + oneapi::math::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { - oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklcpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -2012,11 +1976,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2024,11 +1987,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2036,12 +1998,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2049,12 +2010,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2062,679 +2022,665 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const float* x, std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const double* x, std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2742,12 +2688,11 @@ sycl::event gemm_batch(backend_selector selector, transpose *tr } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2755,12 +2700,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2768,13 +2712,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2782,13 +2725,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2796,12 +2738,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2810,10 +2752,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2822,10 +2764,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2834,10 +2776,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2845,185 +2787,180 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = - oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3031,11 +2968,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3043,11 +2980,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3055,11 +2992,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3067,89 +3004,87 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, - incy, a, lda, dependencies); + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, - incy, a, lda, dependencies); + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3157,11 +3092,10 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3169,11 +3103,11 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3181,62 +3115,57 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3244,779 +3173,759 @@ sycl::event trsm_batch(backend_selector selector, side *left_ri } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, - dependencies); +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, - dependencies); +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, - incy, result, dependencies); + incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4024,10 +3933,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4036,9 +3945,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4047,9 +3956,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4057,18 +3966,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4076,9 +3985,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4086,9 +3995,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4096,10 +4005,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4108,10 +4017,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4120,11 +4029,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4133,11 +4042,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4145,150 +4054,150 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } @@ -4297,9 +4206,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* lda, float** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4308,9 +4217,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* lda, double** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4318,11 +4227,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4330,33 +4238,30 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklcpu::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } @@ -4366,8 +4271,8 @@ sycl::event imatcopy_batch(backend_selector selector, transpose std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } @@ -4377,7 +4282,7 @@ sycl::event imatcopy_batch(backend_selector selector, transpose std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } diff --git a/include/oneapi/math/blas/detail/mklgpu/blas_ct.hxx b/include/oneapi/math/blas/detail/mklgpu/blas_ct.hxx index b00317329..7ab1f887c 100644 --- a/include/oneapi/math/blas/detail/mklgpu/blas_ct.hxx +++ b/include/oneapi/math/blas/detail/mklgpu/blas_ct.hxx @@ -20,1991 +20,1955 @@ // Buffer APIs void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, - std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, + std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - double beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { + oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { + oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { - oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { - oneapi::math::blas::mklgpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { + oneapi::math::blas::mklgpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, + result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size) { + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a, - std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { - oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { + oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { + oneapi::math::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { - oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::mklgpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -2012,11 +1976,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2024,11 +1987,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2036,12 +1998,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2049,12 +2010,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2062,679 +2022,665 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const float* x, std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const double* x, std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2742,12 +2688,12 @@ sycl::event gemm_batch(backend_selector selector, transpose *tr } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2756,10 +2702,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2768,10 +2714,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2780,10 +2726,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2791,12 +2737,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2804,12 +2749,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2817,13 +2761,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2831,13 +2774,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2845,185 +2787,180 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = - oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3031,11 +2968,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3043,11 +2980,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3055,11 +2992,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3067,89 +3004,87 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, - incy, a, lda, dependencies); + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, - incy, a, lda, dependencies); + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3157,11 +3092,10 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3169,11 +3103,11 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3181,62 +3115,57 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3244,779 +3173,759 @@ sycl::event trsm_batch(backend_selector selector, side *left_ri } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, - dependencies); +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, - dependencies); +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, - incy, result, dependencies); + incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4024,10 +3933,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4036,9 +3945,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4047,9 +3956,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4057,18 +3966,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4076,9 +3985,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4086,9 +3995,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4096,10 +4005,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4108,10 +4017,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4120,11 +4029,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4133,11 +4042,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4145,150 +4054,150 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } @@ -4297,9 +4206,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* lda, float** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4308,9 +4217,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* lda, double** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4318,11 +4227,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4330,33 +4238,30 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::mklgpu::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } @@ -4366,8 +4271,8 @@ sycl::event imatcopy_batch(backend_selector selector, transpose std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } @@ -4377,7 +4282,7 @@ sycl::event imatcopy_batch(backend_selector selector, transpose std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } diff --git a/include/oneapi/math/blas/detail/netlib/blas_ct.hxx b/include/oneapi/math/blas/detail/netlib/blas_ct.hxx index 1e55eaa94..c1ff9b629 100644 --- a/include/oneapi/math/blas/detail/netlib/blas_ct.hxx +++ b/include/oneapi/math/blas/detail/netlib/blas_ct.hxx @@ -20,1991 +20,1955 @@ // Buffer APIs void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, - std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, + std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - double beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, - ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, - lda, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, + a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { + oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { + oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { - oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, + incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { - oneapi::math::blas::netlib::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { + oneapi::math::blas::netlib::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, + result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, stridey, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size) { + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { - oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, - k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { + oneapi::math::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a, - std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { - oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, - x, incx, beta, y, incy); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, + lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { + oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, - stridey, batch_size); + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { + oneapi::math::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, + incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { - oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, + k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + incy, a); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, b, - ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::netlib::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, stride_b, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + lda, beta, b, ldb, c, ldc); } // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::trmv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -2012,11 +1976,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2024,11 +1987,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2036,12 +1998,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2049,12 +2010,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2062,679 +2022,665 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); + s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const float* x, std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const double* x, std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, x, incx, beta, y, incy, - group_count, group_size, dependencies); +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::gemv_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, + group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, - a, lda, x, incx, c, ldc, group_count, - group_size, dependencies); +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::dgmm_batch( + selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, + dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2742,12 +2688,11 @@ sycl::event gemm_batch(backend_selector selector, transpose *tr } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2755,12 +2700,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2768,13 +2712,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2782,13 +2725,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2796,12 +2738,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2810,10 +2752,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2822,10 +2764,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2834,10 +2776,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2845,185 +2787,180 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = - oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = - oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, dependencies); + oneapi::math::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3031,11 +2968,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3043,11 +2980,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3055,11 +2992,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3067,89 +3004,87 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, - incy, a, lda, dependencies); + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, - incy, a, lda, dependencies); + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, + y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3157,11 +3092,10 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3169,11 +3103,11 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3181,62 +3115,57 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3244,779 +3173,759 @@ sycl::event trsm_batch(backend_selector selector, side *left_ri } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, - dependencies); +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, - dependencies); +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, + param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, lda, x, incx, dependencies); + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::trsv( + selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, - incy, result, dependencies); + incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4024,10 +3933,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4036,10 +3945,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4048,10 +3956,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4059,19 +3966,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, - const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4079,9 +3985,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4089,9 +3995,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4099,10 +4005,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4111,10 +4017,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4123,12 +4029,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4137,12 +4042,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4150,150 +4054,150 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, - a, lda, b, ldb, dependencies); + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, - ab, lda, ldb, dependencies); + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, + alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } @@ -4302,9 +4206,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* lda, float** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4313,9 +4217,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* lda, double** b, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4323,11 +4227,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } @@ -4335,33 +4238,30 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::netlib::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, + dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } @@ -4371,8 +4271,8 @@ sycl::event imatcopy_batch(backend_selector selector, transpose std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } @@ -4382,7 +4282,7 @@ sycl::event imatcopy_batch(backend_selector selector, transpose std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + alpha, ab, lda, ldb, group_count, + groupsize, dependencies); return done; } diff --git a/include/oneapi/math/blas/detail/onemath_blas_backends.hxx b/include/oneapi/math/blas/detail/onemath_blas_backends.hxx index e9fcbbf66..06ea19b75 100644 --- a/include/oneapi/math/blas/detail/onemath_blas_backends.hxx +++ b/include/oneapi/math/blas/detail/onemath_blas_backends.hxx @@ -19,2928 +19,2880 @@ // Buffer APIs -ONEMATH_EXPORT void gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); - -ONEMATH_EXPORT void gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); - -ONEMATH_EXPORT void gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::half beta, sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void symm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void symm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void symm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void symm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void hemm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void hemm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void herk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void herk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); +ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc); -ONEMATH_EXPORT void syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); +ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc); -ONEMATH_EXPORT void syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, +ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::half beta, sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void symm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void symm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void symm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMATH_EXPORT void syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, +ONEMATH_EXPORT void symm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void hemm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void hemm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); -ONEMATH_EXPORT void her2k(sycl::queue &queue, oneapi::math::uplo upper_lower, +ONEMATH_EXPORT void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, float beta, sycl::buffer, 1> &c, - std::int64_t ldc); + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); -ONEMATH_EXPORT void her2k(sycl::queue &queue, oneapi::math::uplo upper_lower, +ONEMATH_EXPORT void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void trmm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - -ONEMATH_EXPORT void trmm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - -ONEMATH_EXPORT void trmm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb); - -ONEMATH_EXPORT void trmm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb); - -ONEMATH_EXPORT void trsm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - -ONEMATH_EXPORT void trsm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - -ONEMATH_EXPORT void trsm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb); - -ONEMATH_EXPORT void trsm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb); - -ONEMATH_EXPORT void gemv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void gemv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void gemv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void gemv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMATH_EXPORT void gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size); - -ONEMATH_EXPORT void dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size); - -ONEMATH_EXPORT void dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - -ONEMATH_EXPORT void dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - -ONEMATH_EXPORT void gbmv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void gbmv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void gbmv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void gbmv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMATH_EXPORT void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMATH_EXPORT void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, - std::int64_t lda); - -ONEMATH_EXPORT void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, - std::int64_t lda); - -ONEMATH_EXPORT void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, - std::int64_t lda); - -ONEMATH_EXPORT void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, - std::int64_t lda); - -ONEMATH_EXPORT void hbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void hbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void hemv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void hemv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void her(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); - -ONEMATH_EXPORT void her(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, - std::int64_t lda); - -ONEMATH_EXPORT void her2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, - std::int64_t lda); - -ONEMATH_EXPORT void her2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, - std::int64_t lda); - -ONEMATH_EXPORT void hpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void hpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -ONEMATH_EXPORT void hpr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); - -ONEMATH_EXPORT void hpr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); - -ONEMATH_EXPORT void hpr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a); - -ONEMATH_EXPORT void hpr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a); - -ONEMATH_EXPORT void sbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void sbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void symv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void symv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void syr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda); - -ONEMATH_EXPORT void syr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda); - -ONEMATH_EXPORT void syr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMATH_EXPORT void syr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMATH_EXPORT void spmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, - std::int64_t incy); - -ONEMATH_EXPORT void spmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void spr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a); - -ONEMATH_EXPORT void spr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a); - -ONEMATH_EXPORT void spr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - -ONEMATH_EXPORT void spr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - -ONEMATH_EXPORT void tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx); - -ONEMATH_EXPORT void tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx); - -ONEMATH_EXPORT void tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx); - -ONEMATH_EXPORT void tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx); - -ONEMATH_EXPORT void tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); - -ONEMATH_EXPORT void tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); - -ONEMATH_EXPORT void tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); - -ONEMATH_EXPORT void tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); - -ONEMATH_EXPORT void tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void dotc(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - -ONEMATH_EXPORT void dotc(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - -ONEMATH_EXPORT void dotu(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - -ONEMATH_EXPORT void dotu(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - -ONEMATH_EXPORT void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - -ONEMATH_EXPORT void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - -ONEMATH_EXPORT void iamax(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void iamax(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - -ONEMATH_EXPORT void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - -ONEMATH_EXPORT void iamin(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void iamin(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void asum(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void asum(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMATH_EXPORT void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - -ONEMATH_EXPORT void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - -ONEMATH_EXPORT void axpy(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void axpy(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - -ONEMATH_EXPORT void axpby(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void axpby(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +ONEMATH_EXPORT void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void herk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void herk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, + double beta, sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void her2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, float beta, sycl::buffer, 1>& c, + std::int64_t ldc); + +ONEMATH_EXPORT void her2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, double beta, sycl::buffer, 1>& c, + std::int64_t ldc); + +ONEMATH_EXPORT void trmm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb); + +ONEMATH_EXPORT void trmm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb); + +ONEMATH_EXPORT void trmm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb); + +ONEMATH_EXPORT void trmm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb); + +ONEMATH_EXPORT void trsm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb); + +ONEMATH_EXPORT void trsm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb); + +ONEMATH_EXPORT void trsm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb); + +ONEMATH_EXPORT void trsm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb); + +ONEMATH_EXPORT void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMATH_EXPORT void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +ONEMATH_EXPORT void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMATH_EXPORT void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +ONEMATH_EXPORT void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, float beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); + +ONEMATH_EXPORT void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, double beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); + +ONEMATH_EXPORT void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size); + +ONEMATH_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size); + +ONEMATH_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); + +ONEMATH_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); + +ONEMATH_EXPORT void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy); -ONEMATH_EXPORT void copy(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +ONEMATH_EXPORT void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy); -ONEMATH_EXPORT void copy(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +ONEMATH_EXPORT void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); + +ONEMATH_EXPORT void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); + +ONEMATH_EXPORT void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, + std::int64_t lda); + +ONEMATH_EXPORT void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, + std::int64_t lda); + +ONEMATH_EXPORT void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, + std::int64_t lda); + +ONEMATH_EXPORT void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, + std::int64_t lda); + +ONEMATH_EXPORT void hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); -ONEMATH_EXPORT void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); +ONEMATH_EXPORT void hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); -ONEMATH_EXPORT void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +ONEMATH_EXPORT void hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +ONEMATH_EXPORT void her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +ONEMATH_EXPORT void her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, + std::int64_t lda); + +ONEMATH_EXPORT void her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, + std::int64_t lda); + +ONEMATH_EXPORT void hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); -ONEMATH_EXPORT void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +ONEMATH_EXPORT void hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); -ONEMATH_EXPORT void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +ONEMATH_EXPORT void hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); + +ONEMATH_EXPORT void hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); + +ONEMATH_EXPORT void hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a); + +ONEMATH_EXPORT void hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a); + +ONEMATH_EXPORT void sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + +ONEMATH_EXPORT void syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + +ONEMATH_EXPORT void syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); + +ONEMATH_EXPORT void syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); + +ONEMATH_EXPORT void spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy); -ONEMATH_EXPORT void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +ONEMATH_EXPORT void spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy); -ONEMATH_EXPORT void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +ONEMATH_EXPORT void spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); + +ONEMATH_EXPORT void spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); + +ONEMATH_EXPORT void spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); + +ONEMATH_EXPORT void spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); + +ONEMATH_EXPORT void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + +ONEMATH_EXPORT void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + +ONEMATH_EXPORT void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx); + +ONEMATH_EXPORT void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx); + +ONEMATH_EXPORT void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + +ONEMATH_EXPORT void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + +ONEMATH_EXPORT void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx); + +ONEMATH_EXPORT void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx); + +ONEMATH_EXPORT void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx); + +ONEMATH_EXPORT void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx); + +ONEMATH_EXPORT void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx); + +ONEMATH_EXPORT void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx); + +ONEMATH_EXPORT void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + +ONEMATH_EXPORT void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + +ONEMATH_EXPORT void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + +ONEMATH_EXPORT void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + +ONEMATH_EXPORT void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void dotc(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + +ONEMATH_EXPORT void dotc(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + +ONEMATH_EXPORT void dotu(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + +ONEMATH_EXPORT void dotu(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + +ONEMATH_EXPORT void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + +ONEMATH_EXPORT void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + +ONEMATH_EXPORT void iamax(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void iamax(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + +ONEMATH_EXPORT void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + +ONEMATH_EXPORT void iamin(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void iamin(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void asum(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void asum(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + +ONEMATH_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + +ONEMATH_EXPORT void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -ONEMATH_EXPORT void sdsdot(sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +ONEMATH_EXPORT void axpy(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); -ONEMATH_EXPORT void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +ONEMATH_EXPORT void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -ONEMATH_EXPORT void nrm2(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMATH_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -ONEMATH_EXPORT void nrm2(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMATH_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -ONEMATH_EXPORT void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMATH_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + +ONEMATH_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMATH_EXPORT void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMATH_EXPORT void axpby(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); -ONEMATH_EXPORT void rot(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, - float s); - -ONEMATH_EXPORT void rot(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, - double s); - -ONEMATH_EXPORT void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, - float s); - -ONEMATH_EXPORT void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - double c, double s); - -ONEMATH_EXPORT void rotg(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - -ONEMATH_EXPORT void rotg(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - -ONEMATH_EXPORT void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); - -ONEMATH_EXPORT void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); - -ONEMATH_EXPORT void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - -ONEMATH_EXPORT void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - -ONEMATH_EXPORT void rotmg(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m); - -ONEMATH_EXPORT void rotmg(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - double y1, sycl::buffer ¶m); - -ONEMATH_EXPORT void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx); - -ONEMATH_EXPORT void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMATH_EXPORT void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - -ONEMATH_EXPORT void swap(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void swap(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMATH_EXPORT void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void trsm_batch(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void trsm_batch(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void trsm_batch(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void trsm_batch(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -ONEMATH_EXPORT void gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); +ONEMATH_EXPORT void axpby(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); -ONEMATH_EXPORT void gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); +ONEMATH_EXPORT void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); -ONEMATH_EXPORT void gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, oneapi::math::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); - -ONEMATH_EXPORT void gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, oneapi::math::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); - -ONEMATH_EXPORT void gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, oneapi::math::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); - -ONEMATH_EXPORT void gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, oneapi::math::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); - -ONEMATH_EXPORT void omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -ONEMATH_EXPORT void imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, - std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size); - -ONEMATH_EXPORT void imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, - std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size); - -ONEMATH_EXPORT void imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - -ONEMATH_EXPORT void imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - -ONEMATH_EXPORT void omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - -ONEMATH_EXPORT void omatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); - -ONEMATH_EXPORT void omatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); - -ONEMATH_EXPORT void omatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -ONEMATH_EXPORT void omatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -ONEMATH_EXPORT void omatcopy2(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &b, - std::int64_t ldb, std::int64_t strideb); - -ONEMATH_EXPORT void omatcopy2(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &b, - std::int64_t ldb, std::int64_t strideb); - -ONEMATH_EXPORT void omatcopy2(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t strideb); +ONEMATH_EXPORT void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy); -ONEMATH_EXPORT void omatcopy2(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t strideb); - -ONEMATH_EXPORT void imatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, - std::int64_t lda, std::int64_t ldb); - -ONEMATH_EXPORT void imatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, - std::int64_t lda, std::int64_t ldb); - -ONEMATH_EXPORT void imatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, - std::int64_t ldb); - -ONEMATH_EXPORT void imatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, - std::int64_t ldb); - -ONEMATH_EXPORT void omatadd(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, - std::int64_t ldc); - -ONEMATH_EXPORT void omatadd(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, - std::int64_t ldc); - -ONEMATH_EXPORT void omatadd(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMATH_EXPORT void omatadd(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); +ONEMATH_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -// USM APIs +ONEMATH_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -ONEMATH_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void copy(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -ONEMATH_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void copy(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -ONEMATH_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMATH_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMATH_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const bfloat16 *a, std::int64_t lda, - const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, oneapi::math::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, oneapi::math::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, oneapi::math::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, oneapi::math::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event symm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event symm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event symm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event symm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hemm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hemm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, double beta, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMATH_EXPORT sycl::event syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event herk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event herk(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, const std::complex *a, std::int64_t lda, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event her2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event her2k(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side left_right, - oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm_batch( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm_batch( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side *left_right, - oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, - oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side *left_right, - oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, - oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm_batch( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsm_batch( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv(sycl::queue &queue, oneapi::math::transpose trans, +ONEMATH_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + +ONEMATH_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + +ONEMATH_EXPORT void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + +ONEMATH_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + +ONEMATH_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMATH_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + +ONEMATH_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + +ONEMATH_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, float c, float s); + +ONEMATH_EXPORT void rot(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, + double s); + +ONEMATH_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, + float s); + +ONEMATH_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, + double s); + +ONEMATH_EXPORT void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); + +ONEMATH_EXPORT void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); + +ONEMATH_EXPORT void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); + +ONEMATH_EXPORT void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); + +ONEMATH_EXPORT void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); + +ONEMATH_EXPORT void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); + +ONEMATH_EXPORT void rotmg(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param); + +ONEMATH_EXPORT void rotmg(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param); + +ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx); + +ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx); + +ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMATH_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + +ONEMATH_EXPORT void swap(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void swap(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); + +ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); + +ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, + sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); + +ONEMATH_EXPORT void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); + +ONEMATH_EXPORT void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); + +ONEMATH_EXPORT void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); + +ONEMATH_EXPORT void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + double beta, sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc); + +ONEMATH_EXPORT void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc); + +ONEMATH_EXPORT void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, oneapi::math::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); + +ONEMATH_EXPORT void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, oneapi::math::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); + +ONEMATH_EXPORT void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, oneapi::math::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); + +ONEMATH_EXPORT void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, oneapi::math::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); + +ONEMATH_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); + +ONEMATH_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); + +ONEMATH_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); -ONEMATH_EXPORT sycl::event gemv(sycl::queue &queue, oneapi::math::transpose trans, +ONEMATH_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, - float beta, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, - double beta, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); + +ONEMATH_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size); + +ONEMATH_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size); + +ONEMATH_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, + std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMATH_EXPORT sycl::event gemv_batch( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, + std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMATH_EXPORT sycl::event gemv_batch( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, - const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float *beta, float **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, - const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double *beta, double **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT sycl::event gemv_batch( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, - std::int64_t m, std::int64_t n, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float *c, - std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); -ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, - std::int64_t m, std::int64_t n, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double *c, - std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, - std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, - std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, - double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side *left_right, - std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side *left_right, - std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gbmv(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gbmv(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gbmv(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gbmv(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hemv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hemv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event her(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const std::complex *x, - std::int64_t incx, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event her(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const std::complex *x, - std::int64_t incx, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event her2(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event her2(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hpr(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const std::complex *x, - std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hpr(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const std::complex *x, - std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hpr2(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hpr2(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event symv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event symv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr2(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syr2(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *a, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *a, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spr(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *a, const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spr(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *a, const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spr2(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spr2(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dotc(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dotc(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dotu(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dotu(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamax(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamax(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamin(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event iamin(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event asum(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event asum(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, float *alpha, - const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, double *alpha, - const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, - std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, - float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +ONEMATH_EXPORT void omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); + +ONEMATH_EXPORT void omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); + +ONEMATH_EXPORT void omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, - double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpby(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpby(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, - double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpby(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event axpby(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -ONEMATH_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& b, + std::int64_t ldb, std::int64_t strideb); -ONEMATH_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& b, + std::int64_t ldb, std::int64_t strideb); -ONEMATH_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - double *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event nrm2(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event nrm2(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - float c, float s, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - double c, double s, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, - double s, const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, - double *s, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotg(sycl::queue &queue, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotg(sycl::queue &queue, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, - double y1, double *param, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t strideb); -ONEMATH_EXPORT sycl::event gemm_batch( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, sycl::half *alpha, - const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double beta, double *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t strideb); -ONEMATH_EXPORT sycl::event gemm_batch( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void imatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, + std::int64_t lda, std::int64_t ldb); -ONEMATH_EXPORT sycl::event gemm_batch( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); +ONEMATH_EXPORT void imatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, + std::int64_t lda, std::int64_t ldb); -ONEMATH_EXPORT sycl::event gemm_batch( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, - std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, - std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, - std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void imatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, + std::int64_t ldb); -ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void imatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, + std::int64_t ldb); + +ONEMATH_EXPORT void omatadd(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, + std::int64_t ldc); + +ONEMATH_EXPORT void omatadd(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc); + +ONEMATH_EXPORT void omatadd(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMATH_EXPORT void omatadd(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); +// USM APIs -ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda, + const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, oneapi::math::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int8_t ao, + const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, oneapi::math::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int8_t ao, + const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, oneapi::math::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, oneapi::math::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hemm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hemm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, float beta, float* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, double beta, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, double* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event herk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + float alpha, const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event herk(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + double alpha, const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event her2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event her2k(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right, + oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right, + oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, + std::int64_t* m, std::int64_t* n, float* alpha, + const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right, + oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, + std::int64_t* m, std::int64_t* n, double* alpha, + const double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right, + oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right, + oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float beta, float* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, - std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event gemv_batch( + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, - std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event gemv_batch( + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, + const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float* beta, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, + const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double* beta, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, + std::int64_t m, std::int64_t n, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, + std::int64_t m, std::int64_t n, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, + double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, + std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, + std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, + std::int64_t lda, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, + std::int64_t lda, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, const float* a, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, const double* a, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, std::int64_t stridex, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, + double* result, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + float c, float s, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + double c, double s, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, + float* c, std::complex* s, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotg(sycl::queue& queue, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, float* param, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, double* param, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, + float* param, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, + double* param, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, sycl::half* alpha, + const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + std::int32_t** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, const float* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, + std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, +ONEMATH_EXPORT sycl::event gemm_batch( + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch( + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, std::int32_t* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, + std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, + std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + std::complex* ab, std::int64_t lda, + std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, + std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, +ONEMATH_EXPORT sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatadd_batch( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatadd_batch( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - float *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - double *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, float *b, - std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, - std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, - std::int64_t strideb, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue &queue, oneapi::math::transpose trans, +ONEMATH_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, - std::int64_t strideb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event imatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, - std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event imatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, - std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event imatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event imatcopy(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float beta, - const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, float* b, + std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, double* b, + std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, + std::int64_t strideb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, + std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, + std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, + std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double beta, - const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, float beta, + const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, double beta, + const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); -ONEMATH_EXPORT sycl::event omatadd(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, - std::int64_t* n, float* alpha, const float** a, - std::int64_t* lda, float** b, std::int64_t* ldb, - std::int64_t group_count, std::int64_t* groupsize, - const std::vector& dependencies = {}); + std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, - std::int64_t* n, double* alpha, const double** a, - std::int64_t* lda, double** b, std::int64_t* ldb, - std::int64_t group_count, std::int64_t* groupsize, - const std::vector& dependencies = {}); + std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, - std::int64_t* n, std::complex* alpha, - const std::complex** a, std::int64_t* lda, - std::complex** b, std::int64_t* ldb, - std::int64_t group_count, std::int64_t* groupsize, - const std::vector& dependencies = {}); + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, - std::int64_t* n, std::complex* alpha, - const std::complex** a, std::int64_t* lda, - std::complex** b, std::int64_t* ldb, - std::int64_t group_count, std::int64_t* groupsize, - const std::vector& dependencies = {}); + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, - std::int64_t* n, float* alpha, float** ab, - std::int64_t* lda, std::int64_t* ldb, - std::int64_t group_count, std::int64_t* groupsize, - const std::vector& dependencies = {}); + std::int64_t* n, float* alpha, float** ab, + std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, - std::int64_t* n, double* alpha, double** ab, - std::int64_t* lda, std::int64_t* ldb, - std::int64_t group_count, std::int64_t* groupsize, - const std::vector& dependencies = {}); + std::int64_t* n, double* alpha, double** ab, + std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, - std::int64_t* n, std::complex* alpha, - std::complex** ab, std::int64_t* lda, - std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies = {}); + std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, - std::int64_t* n, std::complex* alpha, - std::complex** ab, std::int64_t* lda, - std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies = {}); + std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, + const std::vector& dependencies = {}); diff --git a/include/oneapi/math/blas/detail/portblas/blas_ct.hxx b/include/oneapi/math/blas/detail/portblas/blas_ct.hxx index 8f505ef8c..2f3694c6e 100644 --- a/include/oneapi/math/blas/detail/portblas/blas_ct.hxx +++ b/include/oneapi/math/blas/detail/portblas/blas_ct.hxx @@ -20,1894 +20,1894 @@ // Buffer APIs void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, - std::int64_t lda, double beta, sycl::buffer, 1> &c, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a); + a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a); + a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a, lda); + y, incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::math::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a, lda); + y, incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, + stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, + stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, + stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, + stridex, y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, - result); + result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { + oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, + a, lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { + oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, + a, lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, - stridey, batch_size); + oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, + lda, stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, - stridey, batch_size); + oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, + lda, stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, - stridey, batch_size); + oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, + lda, stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, - stridey, batch_size); + oneapi::math::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, + lda, stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { + oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, + a, lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { + oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, + a, lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a); + a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, - a); + a); } -void iamin(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, - incx, beta, y, incy); + incx, beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, - incx, beta, y, incy); + incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + float alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, - incx, beta, y, incy); + incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, - co); + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, + ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, - co); + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, + ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, - co); + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, + ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::math::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, - co); + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, + ldc, co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { + oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, + a, lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { + oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, + a, lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a, lda); + y, incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a, lda); + y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a); + y, incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a); + y, incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a, lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { - oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a, lda); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + oneapi::math::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, + lda, x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { - oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { + oneapi::math::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, + n, k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a); + y, incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, - y, incy, a); + y, incy, a); } -void iamax(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, + alpha, a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - oneapi::math::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { + oneapi::math::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, + x, incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + oneapi::math::blas::portblas::MAJOR::omatcopy_batch( + selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, + ab, lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, + ab, lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, + ab, lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { - oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + oneapi::math::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, + ab, lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, - b, ldb); + b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, - b, ldb); + b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, - b, ldb); + b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, - b, ldb); + b, ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc); + a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc); + a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc); + a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, - a, lda, beta, b, ldb, c, ldc); + a, lda, beta, b, ldb, c, ldc); } // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syr2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syr2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, const std::vector &dependencies) { + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, + alpha, x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, + alpha, x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hpmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hpmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; @@ -1915,9 +1915,9 @@ sycl::event syrk(backend_selector selector, uplo upper_lower, sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; @@ -1925,54 +1925,54 @@ sycl::event syrk(backend_selector selector, uplo upper_lower, sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -1980,10 +1980,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, float *c, + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1992,9 +1992,9 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_ sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2003,10 +2003,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_ sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2015,10 +2015,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_ sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2026,330 +2026,330 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_ } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::her2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::her2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, - c, s, dependencies); + c, s, dependencies); return done; } sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, + std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, - c, s, dependencies); + c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, - c, s, dependencies); + c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, - c, s, dependencies); + c, s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, + y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, + y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, + y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, + y, incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, float *alpha, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, double *alpha, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, + incx, y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, + incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + n, k, alpha, a, lda, b, ldb, beta, c, + ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + n, k, alpha, a, lda, b, ldb, beta, c, + ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + n, k, alpha, a, lda, b, ldb, beta, c, + ldc, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + n, k, alpha, a, lda, b, ldb, beta, c, + ldc, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, std::int64_t incy, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2357,11 +2357,11 @@ sycl::event gemv_batch(backend_selector selector, transpose t } sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, std::int64_t incy, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2370,11 +2370,11 @@ sycl::event gemv_batch(backend_selector selector, transpose t sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2383,58 +2383,58 @@ sycl::event gemv_batch(backend_selector selector, transpose t sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); @@ -2442,10 +2442,10 @@ sycl::event gemv_batch(backend_selector selector, transpose * } sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2453,10 +2453,10 @@ sycl::event dgmm_batch(backend_selector selector, side left_r } sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2464,11 +2464,11 @@ sycl::event dgmm_batch(backend_selector selector, side left_r } sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2476,55 +2476,55 @@ sycl::event dgmm_batch(backend_selector selector, side left_r } sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); @@ -2532,162 +2532,162 @@ sycl::event dgmm_batch(backend_selector selector, side *left_ } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, + alpha, x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, + alpha, x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, + alpha, x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, + alpha, x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2696,11 +2696,11 @@ sycl::event gemm_batch(backend_selector selector, transpose * sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2709,10 +2709,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2721,10 +2721,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2733,10 +2733,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2745,10 +2745,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2757,10 +2757,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2769,11 +2769,11 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2782,11 +2782,11 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2794,180 +2794,180 @@ sycl::event gemm_batch(backend_selector selector, transpose t } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::spmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::spmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, + incx, y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, + incx, y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = - oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2976,10 +2976,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tr sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2988,10 +2988,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tr sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3000,10 +3000,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tr sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3011,87 +3011,87 @@ sycl::event gemm_bias(backend_selector selector, transpose tr } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3100,9 +3100,9 @@ sycl::event trsm_batch(backend_selector selector, side left_r sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3111,10 +3111,10 @@ sycl::event trsm_batch(backend_selector selector, side left_r sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3123,56 +3123,56 @@ sycl::event trsm_batch(backend_selector selector, side left_r sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3180,760 +3180,760 @@ sycl::event trsm_batch(backend_selector selector, side *left_ } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, - alpha, x, incx, y, incy, a, dependencies); + alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, - alpha, x, incx, y, incy, a, dependencies); + alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, - a, lda, x, incx, beta, y, incy, dependencies); + a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, - a, lda, x, incx, beta, y, incy, dependencies); + a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, - a, lda, x, incx, beta, y, incy, dependencies); + a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, - a, lda, x, incx, beta, y, incy, dependencies); + a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, - upper_lower, m, n, alpha, a, lda, b, ldb, - beta, c, ldc, dependencies); + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, + alpha, x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, + alpha, x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, + alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, - param, dependencies); + param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, double *x1, - double y1, double *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, - param, dependencies); + param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, + transa, transb, n, k, alpha, a, lda, b, + ldb, beta, c, ldc, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, + transa, transb, n, k, alpha, a, lda, b, + ldb, beta, c, ldc, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, + transa, transb, n, k, alpha, a, lda, b, + ldb, beta, c, ldc, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, + transa, transb, n, k, alpha, a, lda, b, + ldb, beta, c, ldc, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, - alpha, x, incx, y, incy, a, dependencies); + alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, - alpha, x, incx, y, incy, a, dependencies); + alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, + float* s, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, - incy, result, dependencies); + incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + n, k, alpha, a, lda, b, ldb, beta, c, + ldc, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, - n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies); + n, k, alpha, a, lda, b, ldb, beta, c, + ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3941,10 +3941,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpo } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3953,9 +3953,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpo sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3964,9 +3964,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpo sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3974,18 +3974,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpo } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -3993,9 +3993,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpo sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4003,9 +4003,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpo sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4013,10 +4013,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpo sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4025,10 +4025,10 @@ sycl::event omatadd_batch(backend_selector selector, transpos sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4037,11 +4037,11 @@ sycl::event omatadd_batch(backend_selector selector, transpos sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4050,11 +4050,11 @@ sycl::event omatadd_batch(backend_selector selector, transpos sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4062,233 +4062,233 @@ sycl::event omatadd_batch(backend_selector selector, transpos } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, - n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies); + transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, + m, n, alpha, a, lda, beta, b, ldb, c, + ldc, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, - n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies); + transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, + m, n, alpha, a, lda, beta, b, ldb, c, + ldc, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, - n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, + m, n, alpha, a, lda, beta, b, ldb, c, + ldc, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, - n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies); + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + auto done = oneapi::math::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, + m, n, alpha, a, lda, beta, b, ldb, c, + ldc, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, float **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, float** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, double **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, double** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { auto done = oneapi::math::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); diff --git a/include/oneapi/math/blas/detail/rocblas/blas_ct.hxx b/include/oneapi/math/blas/detail/rocblas/blas_ct.hxx index cdfeae51b..42332ff59 100644 --- a/include/oneapi/math/blas/detail/rocblas/blas_ct.hxx +++ b/include/oneapi/math/blas/detail/rocblas/blas_ct.hxx @@ -20,1898 +20,1902 @@ **************************************************************************/ void herk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, float alpha, sycl::buffer, 1> &a, int64_t lda, float beta, - sycl::buffer, 1> &c, int64_t ldc) { + int64_t k, float alpha, sycl::buffer, 1>& a, int64_t lda, float beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, double alpha, sycl::buffer, 1> &a, int64_t lda, - double beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t k, double alpha, sycl::buffer, 1>& a, int64_t lda, + double beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void scal(backend_selector selector, int64_t n, float alpha, - sycl::buffer &x, int64_t incx) { + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, double alpha, - sycl::buffer &x, int64_t incx) { + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx) { + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx) { + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx) { + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx) { + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void spr(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { - oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); + sycl::buffer& x, int64_t incx, sycl::buffer& a) { + oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a); } void spr(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { - oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); + sycl::buffer& x, int64_t incx, sycl::buffer& a) { + oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, float beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, double beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, sycl::half alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, sycl::half beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, sycl::half alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, sycl::half beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, float beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, float beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, float beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, stride_a, b, ldb, stride_b, beta, - c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, b, ldb, stride_b, beta, + c, ldc, stride_c, batch_size); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &c, int64_t ldc) { + int64_t k, float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &c, int64_t ldc) { + int64_t k, double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, std::complex beta, sycl::buffer, 1> &c, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, std::complex beta, sycl::buffer, 1> &c, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, beta, c, ldc); + a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, float beta, sycl::buffer &c, int64_t ldc, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, double beta, sycl::buffer &c, int64_t ldc, + int64_t n, int64_t k, double alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, stride_a, beta, c, ldc, stride_c, - batch_size); + alpha, a, lda, stride_a, beta, c, ldc, stride_c, + batch_size); } void her2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - oneapi::math::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + oneapi::math::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - oneapi::math::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + oneapi::math::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void rot(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, float c, float s) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, float c, float s) { oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, double c, double s) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, double c, double s) { oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, float c, float s) { +void rot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, float c, float s) { oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, double c, double s) { +void rot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, double c, double s) { oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, int64_t stridex, sycl::buffer &y, + sycl::buffer& x, int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, int64_t stridex, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { + sycl::buffer& x, int64_t incx, int64_t stridex, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, - y, incy, stridey, batch_size); + y, incy, stridey, batch_size); } void axpby(backend_selector selector, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, float beta, sycl::buffer &y, + sycl::buffer& x, int64_t incx, float beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + incy); } void axpby(backend_selector selector, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, double beta, sycl::buffer &y, + sycl::buffer& x, int64_t incx, double beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + incy); } void axpby(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + incy); } void axpby(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, - incy); + incy); } void sdsdot(backend_selector selector, int64_t n, float sb, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &result) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, - result); + result); } void gerc(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void gerc(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, + int64_t n, int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, float beta, sycl::buffer &y, int64_t incy) { + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, float beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, double beta, sycl::buffer &y, int64_t incy) { + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, double beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, float beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { + float alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, float beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, double beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { + double alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, double beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, - int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, + int64_t stridex, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, x, incx, stridex, beta, y, incy, stridey, - batch_size); + stridea, x, incx, stridex, beta, y, incy, + stridey, batch_size); } void dgmm_batch(backend_selector selector, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &c, int64_t ldc, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { + sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stridea, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &c, int64_t ldc, int64_t stridec, + sycl::buffer, 1>& a, int64_t lda, int64_t stridea, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& c, int64_t ldc, int64_t stridec, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void dgmm_batch(backend_selector selector, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stridea, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &c, int64_t ldc, int64_t stridec, + sycl::buffer, 1>& a, int64_t lda, int64_t stridea, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& c, int64_t ldc, int64_t stridec, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, - stridea, x, incx, stridex, c, ldc, stridec, - batch_size); + stridea, x, incx, stridex, c, ldc, stridec, + batch_size); } void her(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void her(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void hpr(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a) { - oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a) { + oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a); } void hpr(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a) { - oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a) { + oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, + a); } -void iamin(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void iamin(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void iamin(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - oneapi::math::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + oneapi::math::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - oneapi::math::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + oneapi::math::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, float beta, - sycl::buffer &y, int64_t incy) { - oneapi::math::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + sycl::buffer& a, sycl::buffer& x, int64_t incx, float beta, + sycl::buffer& y, int64_t incy) { + oneapi::math::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, double beta, - sycl::buffer &y, int64_t incy) { - oneapi::math::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, - beta, y, incy); + sycl::buffer& a, sycl::buffer& x, int64_t incx, double beta, + sycl::buffer& y, int64_t incy) { + oneapi::math::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, + incx, beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, int8_t ao, sycl::buffer &b, - int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, int64_t lda, int8_t ao, sycl::buffer& b, + int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, int64_t ldc, + sycl::buffer& co) { oneapi::math::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, - co); + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, int8_t ao, sycl::buffer &b, - int64_t ldb, int8_t bo, float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, int64_t lda, int8_t ao, sycl::buffer& b, + int64_t ldb, int8_t bo, float beta, sycl::buffer& c, int64_t ldc, + sycl::buffer& co) { oneapi::math::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, - co); + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, uint8_t ao, sycl::buffer &b, - int64_t ldb, int8_t bo, float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, int64_t lda, uint8_t ao, sycl::buffer& b, + int64_t ldb, int8_t bo, float beta, sycl::buffer& c, int64_t ldc, + sycl::buffer& co) { oneapi::math::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, - co); + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, uint8_t ao, sycl::buffer &b, - int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, int64_t lda, uint8_t ao, sycl::buffer& b, + int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, int64_t ldc, + sycl::buffer& co) { oneapi::math::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, - n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, - co); + n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co); } -void swap(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +void swap(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +void swap(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void geru(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void nrm2(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, + int64_t m, int64_t n, int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, sycl::half alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, sycl::half beta, - sycl::buffer &c, int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + int64_t m, int64_t n, int64_t k, sycl::half alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, sycl::half beta, + sycl::buffer& c, int64_t ldc) { + oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, - lda, b, ldb, beta, c, ldc); + oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { - oneapi::math::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { + oneapi::math::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { - oneapi::math::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a, lda); + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { + oneapi::math::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a, lda); } void ger(backend_selector selector, int64_t m, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void ger(backend_selector selector, int64_t m, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, - lda); + lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a) { - oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a) { + oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a); } void hpr2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a) { - oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a) { + oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a); } void gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx, float beta, sycl::buffer &y, + int64_t kl, int64_t ku, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx, float beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx, double beta, sycl::buffer &y, + int64_t kl, int64_t ku, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx, double beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy); + lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { + int64_t n, float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, + int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { oneapi::math::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void syr(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { oneapi::math::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, - lda); + lda); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, - unit_diag, m, n, alpha, a, lda, b, ldb); + unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::math::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::math::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, x, incx); + a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - a, lda, x, incx); + a, lda, x, incx); } -void copy(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +void copy(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +void copy(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy_batch(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void copy_batch(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + incy, stridey, batch_size); } -void copy_batch(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void copy_batch(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + incy, stridey, batch_size); } void copy_batch(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + incy, stridey, batch_size); } void copy_batch(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, - incy, stridey, batch_size); + incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, float beta, - sycl::buffer &c, int64_t ldc) { + transpose transb, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, float beta, + sycl::buffer& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, int64_t n, int64_t k, double alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, double beta, - sycl::buffer &c, int64_t ldc) { + transpose transb, int64_t n, int64_t k, double alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, double beta, + sycl::buffer& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, - k, alpha, a, lda, b, ldb, beta, c, ldc); + k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, float beta, sycl::buffer &y, int64_t incy) { + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, float beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, double beta, sycl::buffer &y, int64_t incy) { + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, double beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, - x, incx, beta, y, incy); + x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx) { oneapi::math::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, - k, a, lda, x, incx); + k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { - oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { + oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a); } void spr2(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { - oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, - incy, a); + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { + oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, + y, incy, a); } -void iamax(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void iamax(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void iamax(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& param) { oneapi::math::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& param) { oneapi::math::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& result) { oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, - trans, unit_diag, m, n, alpha, a, lda, stride_a, - b, ldb, stride_b, batch_size); + trans, unit_diag, m, n, alpha, a, lda, stride_a, + b, ldb, stride_b, batch_size); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, float beta, - sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, float beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, double beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, double beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, - a, lda, b, ldb, beta, c, ldc); + a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { oneapi::math::blas::rocblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, - incx, beta, y, incy); + incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size); + lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, - lda, ldb, stride, batch_size); + lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::math::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, - alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size); + alpha, a, lda, stride_a, beta, b, ldb, + stride_b, c, ldc, stride_c, batch_size); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { - oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { + oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { - oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { + oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { + oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { - oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, - ldb); + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { + oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, + b, ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::math::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, - stridea, b, ldb, strideb); + stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, - ldb); + ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { + oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { - oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, - lda, beta, b, ldb, c, ldc); + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { + oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, + a, lda, beta, b, ldb, c, ldc); } // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *x, int64_t incx, const float *y, int64_t incy, float *a, - int64_t lda, const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + float alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* a, + int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::syr2( + selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *x, int64_t incx, const double *y, int64_t incy, - double *a, int64_t lda, const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + double alpha, const double* x, int64_t incx, const double* y, int64_t incy, + double* a, int64_t lda, const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::syr2( + selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, int64_t n, float alpha, float *x, - int64_t incx, const std::vector &dependencies) { +sycl::event scal(backend_selector selector, int64_t n, float alpha, float* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } -sycl::event scal(backend_selector selector, int64_t n, double alpha, double *x, - int64_t incx, const std::vector &dependencies) { +sycl::event scal(backend_selector selector, int64_t n, double alpha, double* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, int64_t n, std::complex alpha, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, int64_t n, std::complex alpha, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, int64_t n, float alpha, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event scal(backend_selector selector, int64_t n, double alpha, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, - dependencies); + dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const float *a, float *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const float* a, float* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const double *a, double *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const double* a, double* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *x, int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *x, int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + std::complex alpha, const std::complex* a, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::hpmv( + selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + std::complex alpha, const std::complex* a, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::hpmv( + selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, float beta, - float *c, int64_t ldc, const std::vector &dependencies) { + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, float beta, + float* c, int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, double beta, - double *c, int64_t ldc, const std::vector &dependencies) { + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, double beta, + double* c, int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, int64_t *n, int64_t *k, float *alpha, const float **a, - int64_t *lda, float *beta, float **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, int64_t* n, int64_t* k, float* alpha, const float** a, + int64_t* lda, float* beta, float** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, int64_t *n, int64_t *k, double *alpha, const double **a, - int64_t *lda, double *beta, double **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, int64_t* n, int64_t* k, double* alpha, const double** a, + int64_t* lda, double* beta, double** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex* beta, + std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex* beta, + std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -1919,10 +1923,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_ } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, int64_t n, int64_t k, float alpha, const float *a, - int64_t lda, int64_t stride_a, float beta, float *c, int64_t ldc, + transpose trans, int64_t n, int64_t k, float alpha, const float* a, + int64_t lda, int64_t stride_a, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1930,10 +1934,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, int64_t n, int64_t k, double alpha, const double *a, - int64_t lda, int64_t stride_a, double beta, double *c, int64_t ldc, + transpose trans, int64_t n, int64_t k, double alpha, const double* a, + int64_t lda, int64_t stride_a, double beta, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1942,10 +1946,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_l sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex beta, std::complex *c, int64_t ldc, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1954,10 +1958,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_l sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex beta, std::complex *c, int64_t ldc, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1965,321 +1969,321 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_l } sycl::event her2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::her2( + selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, lda, dependencies); + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::her2( + selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, int64_t n, std::complex *x, - int64_t incx, std::complex *y, int64_t incy, float c, float s, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); +sycl::event rot(backend_selector selector, int64_t n, std::complex* x, + int64_t incx, std::complex* y, int64_t incy, float c, float s, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, + c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, int64_t n, std::complex *x, - int64_t incx, std::complex *y, int64_t incy, double c, double s, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); +sycl::event rot(backend_selector selector, int64_t n, std::complex* x, + int64_t incx, std::complex* y, int64_t incy, double c, double s, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, + c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, int64_t n, float *x, int64_t incx, - float *y, int64_t incy, float c, float s, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); +sycl::event rot(backend_selector selector, int64_t n, float* x, int64_t incx, + float* y, int64_t incy, float c, float s, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, + c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, int64_t n, double *x, int64_t incx, - double *y, int64_t incy, double c, double s, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, - s, dependencies); +sycl::event rot(backend_selector selector, int64_t n, double* x, int64_t incx, + double* y, int64_t incy, double c, double s, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, + c, s, dependencies); return done; } sycl::event axpy(backend_selector selector, int64_t n, float alpha, - const float *x, int64_t incx, float *y, int64_t incy, - const std::vector &dependencies) { + const float* x, int64_t incx, float* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, int64_t n, double alpha, - const double *x, int64_t incx, double *y, int64_t incy, - const std::vector &dependencies) { + const double* x, int64_t incx, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } sycl::event axpy(backend_selector selector, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, - incy, dependencies); + incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, int64_t *n, float *alpha, - const float **x, int64_t *incx, float **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, int64_t* n, float* alpha, + const float** x, int64_t* incx, float** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, int64_t *n, double *alpha, - const double **x, int64_t *incx, double **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, int64_t* n, double* alpha, + const double** x, int64_t* incx, double** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, int64_t *n, - std::complex *alpha, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, int64_t* n, + std::complex* alpha, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, int64_t *n, - std::complex *alpha, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, int64_t* n, + std::complex* alpha, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, int64_t n, float alpha, - const float *x, int64_t incx, int64_t stridex, float *y, int64_t incy, + const float* x, int64_t incx, int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, int64_t n, double alpha, - const double *x, int64_t incx, int64_t stridex, double *y, int64_t incy, + const double* x, int64_t incx, int64_t stridex, double* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - int64_t stridex, std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + int64_t stridex, std::complex* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - int64_t stridex, std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + int64_t stridex, std::complex* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, - incx, stridex, y, incy, stridey, - batch_size, dependencies); + incx, stridex, y, incy, stridey, + batch_size, dependencies); return done; } sycl::event axpby(backend_selector selector, int64_t n, float alpha, - const float *x, int64_t incx, const float beta, float *y, int64_t incy, - const std::vector &dependencies) { + const float* x, int64_t incx, const float beta, float* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, int64_t n, double alpha, - const double *x, int64_t incx, const double beta, double *y, int64_t incy, - const std::vector &dependencies) { + const double* x, int64_t incx, const double beta, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, - beta, y, incy, dependencies); + beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy, const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta, + float* y, int64_t incy, const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::gemv( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + double alpha, const double* a, int64_t lda, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::gemv( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::gemv( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::gemv( + selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv_batch(backend_selector selector, transpose trans, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stridea, - const float *x, int64_t incx, int64_t stridex, float beta, float *y, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stridea, + const float* x, int64_t incx, int64_t stridex, float beta, float* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2287,10 +2291,10 @@ sycl::event gemv_batch(backend_selector selector, transpose tr } sycl::event gemv_batch(backend_selector selector, transpose trans, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stridea, - const double *x, int64_t incx, int64_t stridex, double beta, double *y, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stridea, + const double* x, int64_t incx, int64_t stridex, double beta, double* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2298,11 +2302,11 @@ sycl::event gemv_batch(backend_selector selector, transpose tr } sycl::event gemv_batch(backend_selector selector, transpose trans, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stridea, const std::complex *x, int64_t incx, - int64_t stridex, std::complex beta, std::complex *y, + int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stridea, const std::complex* x, int64_t incx, + int64_t stridex, std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2310,55 +2314,55 @@ sycl::event gemv_batch(backend_selector selector, transpose tr } sycl::event gemv_batch(backend_selector selector, transpose trans, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stridea, const std::complex *x, int64_t incx, - int64_t stridex, std::complex beta, std::complex *y, + int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stridea, const std::complex* x, int64_t incx, + int64_t stridex, std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, int64_t *m, - int64_t *n, float *alpha, const float **a, int64_t *lda, const float **x, - int64_t *incx, float *beta, float **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, int64_t* m, + int64_t* n, float* alpha, const float** a, int64_t* lda, const float** x, + int64_t* incx, float* beta, float** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, int64_t *m, - int64_t *n, double *alpha, const double **a, int64_t *lda, const double **x, - int64_t *incx, double *beta, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, int64_t* m, + int64_t* n, double* alpha, const double** a, int64_t* lda, const double** x, + int64_t* incx, double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, int64_t *m, - int64_t *n, std::complex *alpha, const std::complex **a, - int64_t *lda, const std::complex **x, int64_t *incx, - std::complex *beta, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, int64_t* m, + int64_t* n, std::complex* alpha, const std::complex** a, + int64_t* lda, const std::complex** x, int64_t* incx, + std::complex* beta, std::complex** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, int64_t *m, - int64_t *n, std::complex *alpha, const std::complex **a, - int64_t *lda, const std::complex **x, int64_t *incx, - std::complex *beta, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, int64_t* m, + int64_t* n, std::complex* alpha, const std::complex** a, + int64_t* lda, const std::complex** x, int64_t* incx, + std::complex* beta, std::complex** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); @@ -2366,9 +2370,9 @@ sycl::event gemv_batch(backend_selector selector, transpose *t } sycl::event dgmm_batch(backend_selector selector, side left_right, int64_t m, - int64_t n, const float *a, int64_t lda, int64_t stridea, const float *x, - int64_t incx, int64_t stridex, float *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { + int64_t n, const float* a, int64_t lda, int64_t stridea, const float* x, + int64_t incx, int64_t stridex, float* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2376,9 +2380,9 @@ sycl::event dgmm_batch(backend_selector selector, side left_ri } sycl::event dgmm_batch(backend_selector selector, side left_right, int64_t m, - int64_t n, const double *a, int64_t lda, int64_t stridea, const double *x, - int64_t incx, int64_t stridex, double *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { + int64_t n, const double* a, int64_t lda, int64_t stridea, const double* x, + int64_t incx, int64_t stridex, double* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2386,10 +2390,10 @@ sycl::event dgmm_batch(backend_selector selector, side left_ri } sycl::event dgmm_batch(backend_selector selector, side left_right, int64_t m, - int64_t n, const std::complex *a, int64_t lda, int64_t stridea, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies) { + int64_t n, const std::complex* a, int64_t lda, int64_t stridea, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2397,52 +2401,52 @@ sycl::event dgmm_batch(backend_selector selector, side left_ri } sycl::event dgmm_batch(backend_selector selector, side left_right, int64_t m, - int64_t n, const std::complex *a, int64_t lda, int64_t stridea, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies) { + int64_t n, const std::complex* a, int64_t lda, int64_t stridea, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, int64_t *m, - int64_t *n, const float **a, int64_t *lda, const float **x, int64_t *incx, - float **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, int64_t* m, + int64_t* n, const float** a, int64_t* lda, const float** x, int64_t* incx, + float** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, int64_t *m, - int64_t *n, const double **a, int64_t *lda, const double **x, int64_t *incx, - double **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, int64_t* m, + int64_t* n, const double** a, int64_t* lda, const double** x, int64_t* incx, + double** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, int64_t *m, - int64_t *n, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, int64_t* m, + int64_t* n, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, int64_t *m, - int64_t *n, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, int64_t* m, + int64_t* n, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); @@ -2450,151 +2454,151 @@ sycl::event dgmm_batch(backend_selector selector, side *left_r } sycl::event her(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const std::complex *x, int64_t incx, std::complex *a, - int64_t lda, const std::vector &dependencies) { + float alpha, const std::complex* x, int64_t incx, std::complex* a, + int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const std::complex *x, int64_t incx, std::complex *a, - int64_t lda, const std::vector &dependencies) { + double alpha, const std::complex* x, int64_t incx, std::complex* a, + int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const std::complex *x, int64_t incx, std::complex *a, - const std::vector &dependencies) { + float alpha, const std::complex* x, int64_t incx, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const std::complex *x, int64_t incx, std::complex *a, - const std::vector &dependencies) { + double alpha, const std::complex* x, int64_t incx, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, dependencies); + x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, int64_t n, const float *x, - int64_t incx, int64_t *result, const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, int64_t n, const float* x, + int64_t incx, int64_t* result, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamin(backend_selector selector, int64_t n, const double *x, - int64_t incx, int64_t *result, const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, int64_t n, const double* x, + int64_t incx, int64_t* result, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamin(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, - const float **a, int64_t *lda, const float **b, int64_t *ldb, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, float* alpha, + const float** a, int64_t* lda, const float** b, int64_t* ldb, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, double *alpha, - const double **a, int64_t *lda, const double **b, int64_t *ldb, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, double* alpha, + const double** a, int64_t* lda, const double** b, int64_t* ldb, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **b, int64_t *ldb, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** b, int64_t* ldb, std::complex* beta, + std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **b, int64_t *ldb, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** b, int64_t* ldb, std::complex* beta, + std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, sycl::half *alpha, - const sycl::half **a, int64_t *lda, const sycl::half **b, int64_t *ldb, - sycl::half *beta, sycl::half **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, sycl::half* alpha, + const sycl::half** a, int64_t* lda, const sycl::half** b, int64_t* ldb, + sycl::half* beta, sycl::half** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, - const sycl::half **a, int64_t *lda, const sycl::half **b, int64_t *ldb, - float *beta, float **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, float* alpha, + const sycl::half** a, int64_t* lda, const sycl::half** b, int64_t* ldb, + float* beta, float** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, - const std::int8_t **a, int64_t *lda, const std::int8_t **b, int64_t *ldb, - float *beta, float **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, float* alpha, + const std::int8_t** a, int64_t* lda, const std::int8_t** b, int64_t* ldb, + float* beta, float** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, - const std::int8_t **a, int64_t *lda, const std::int8_t **b, int64_t *ldb, - float *beta, std::int32_t **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, float* alpha, + const std::int8_t** a, int64_t* lda, const std::int8_t** b, int64_t* ldb, + float* beta, std::int32_t** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2603,9 +2607,9 @@ sycl::event gemm_batch(backend_selector selector, transpose *t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, int64_t stride_a, const float *b, int64_t ldb, - int64_t stride_b, float beta, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const float* a, int64_t lda, int64_t stride_a, const float* b, int64_t ldb, + int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2614,9 +2618,9 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, int64_t stride_a, const double *b, int64_t ldb, - int64_t stride_b, double beta, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const double* a, int64_t lda, int64_t stride_a, const double* b, int64_t ldb, + int64_t stride_b, double beta, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2625,11 +2629,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex beta, std::complex *c, int64_t ldc, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2638,11 +2642,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2651,10 +2655,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, sycl::half alpha, - const sycl::half *a, int64_t lda, int64_t stride_a, const sycl::half *b, - int64_t ldb, int64_t stride_b, sycl::half beta, sycl::half *c, int64_t ldc, + const sycl::half* a, int64_t lda, int64_t stride_a, const sycl::half* b, + int64_t ldb, int64_t stride_b, sycl::half beta, sycl::half* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2663,10 +2667,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, float alpha, - const sycl::half *a, int64_t lda, int64_t stride_a, const sycl::half *b, - int64_t ldb, int64_t stride_b, float beta, float *c, int64_t ldc, + const sycl::half* a, int64_t lda, int64_t stride_a, const sycl::half* b, + int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2675,10 +2679,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, float alpha, - const std::int8_t *a, int64_t lda, int64_t stride_a, const std::int8_t *b, - int64_t ldb, int64_t stride_b, float beta, float *c, int64_t ldc, + const std::int8_t* a, int64_t lda, int64_t stride_a, const std::int8_t* b, + int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2687,10 +2691,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, float alpha, - const std::int8_t *a, int64_t lda, int64_t stride_a, const std::int8_t *b, - int64_t ldb, int64_t stride_b, float beta, std::int32_t *c, int64_t ldc, + const std::int8_t* a, int64_t lda, int64_t stride_a, const std::int8_t* b, + int64_t ldb, int64_t stride_b, float beta, std::int32_t* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2698,177 +2702,177 @@ sycl::event gemm_batch(backend_selector selector, transpose tr } sycl::event spmv(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *a, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + float alpha, const float* a, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::spmv( + selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *a, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, - a, x, incx, beta, y, incy, dependencies); + double alpha, const double* a, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::spmv( + selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, int64_t n, float *x, int64_t incx, - float *y, int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, int64_t n, float* x, int64_t incx, + float* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, int64_t n, double *x, int64_t incx, - double *y, int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, int64_t n, double* x, int64_t incx, + double* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, int64_t n, std::complex *x, - int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, int64_t n, std::complex* x, + int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event swap(backend_selector selector, int64_t n, std::complex *x, - int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, int64_t n, std::complex* x, + int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event geru(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event nrm2(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, int64_t n, const float *x, - int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, int64_t n, const float* x, + int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event nrm2(backend_selector selector, int64_t n, const double *x, - int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, int64_t n, const double* x, + int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - const float *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, float alpha, const float* a, int64_t lda, + const float* b, int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, double alpha, const double *a, int64_t lda, - const double *b, int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, double alpha, const double* a, int64_t lda, + const double* b, int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, const std::complex* b, + int64_t ldb, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, const std::complex* b, + int64_t ldb, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, sycl::half alpha, const sycl::half *a, - int64_t lda, const sycl::half *b, int64_t ldb, sycl::half beta, sycl::half *c, - int64_t ldc, const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, sycl::half alpha, const sycl::half* a, + int64_t lda, const sycl::half* b, int64_t ldb, sycl::half beta, sycl::half* c, + int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, const sycl::half *a, int64_t lda, - const sycl::half *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, float alpha, const sycl::half* a, int64_t lda, + const sycl::half* b, int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, const bfloat16 *a, int64_t lda, - const bfloat16 *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, float alpha, const bfloat16* a, int64_t lda, + const bfloat16* b, int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, - float alpha, const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, int64_t lda, std::int8_t ao, + const std::uint8_t* b, int64_t ldb, std::uint8_t bo, float beta, + std::int32_t* c, int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2877,10 +2881,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tra sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, - float alpha, const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, int64_t lda, std::int8_t ao, + const std::int8_t* b, int64_t ldb, std::int8_t bo, float beta, + std::int32_t* c, int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2889,10 +2893,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tra sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, - float alpha, const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + float alpha, const std::uint8_t* a, int64_t lda, std::uint8_t ao, + const std::int8_t* b, int64_t ldb, std::int8_t bo, float beta, + std::int32_t* c, int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2901,10 +2905,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tra sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, - float alpha, const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + float alpha, const std::uint8_t* a, int64_t lda, std::uint8_t ao, + const std::uint8_t* b, int64_t ldb, std::uint8_t bo, float beta, + std::int32_t* c, int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2912,83 +2916,83 @@ sycl::event gemm_bias(backend_selector selector, transpose tra } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, const std::complex *a, int64_t lda, - float beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, float alpha, const std::complex* a, int64_t lda, + float beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, const std::complex *a, int64_t lda, - double beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, double alpha, const std::complex* a, int64_t lda, + double beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, int64_t m, int64_t n, float alpha, - const float *x, int64_t incx, const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies) { + const float* x, int64_t incx, const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, int64_t m, int64_t n, double alpha, - const double *x, int64_t incx, const double *y, int64_t incy, double *a, - int64_t lda, const std::vector &dependencies) { + const double* x, int64_t incx, const double* y, int64_t incy, double* a, + int64_t lda, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, - y, incy, a, lda, dependencies); + y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, float *b, int64_t ldb, const std::vector &dependencies) { + transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, float* b, int64_t ldb, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies) { + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, int64_t stride_a, float *b, + float alpha, const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -2997,9 +3001,9 @@ sycl::event trsm_batch(backend_selector selector, side left_ri sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, int64_t stride_a, double *b, + double alpha, const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3008,9 +3012,9 @@ sycl::event trsm_batch(backend_selector selector, side left_ri sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3019,53 +3023,53 @@ sycl::event trsm_batch(backend_selector selector, side left_ri sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, + float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, + double* alpha, const double** a, int64_t* lda, double** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3073,747 +3077,747 @@ sycl::event trsm_batch(backend_selector selector, side *left_r } sycl::event dotu(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotu(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t m, int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t m, int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, + alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, + alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, float alpha, const float *a, int64_t lda, const float *x, - int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies) { + int64_t kl, int64_t ku, float alpha, const float* a, int64_t lda, const float* x, + int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies) { auto done = - oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, double alpha, const double *a, int64_t lda, - const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies) { + int64_t kl, int64_t ku, double alpha, const double* a, int64_t lda, + const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies) { auto done = - oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + int64_t kl, int64_t ku, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = - oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { + int64_t kl, int64_t ku, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = - oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + oneapi::math::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, + a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const float *a, int64_t lda, float *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const float* a, int64_t lda, float* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const double *a, int64_t lda, double *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const double* a, int64_t lda, double* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t m, int64_t n, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t m, int64_t n, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t m, int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t m, int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, + upper_lower, m, n, alpha, a, lda, b, ldb, + beta, c, ldc, dependencies); return done; } sycl::event dotc(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event dotc(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *x, int64_t incx, float *a, int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, int64_t incx, float* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *x, int64_t incx, double *a, int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, int64_t incx, double* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, - x, incx, a, lda, dependencies); + x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, float *b, int64_t ldb, const std::vector &dependencies) { + transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, float* b, int64_t ldb, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies) { + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, - upper_lower, trans, unit_diag, m, n, alpha, - a, lda, b, ldb, dependencies); + upper_lower, trans, unit_diag, m, n, alpha, + a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, - param, dependencies); + param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, double *x1, - double y1, double *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, - param, dependencies); + param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const float *a, float *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const float* a, float* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const double *a, double *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const double* a, double* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, - unit_diag, n, a, x, incx, dependencies); + unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, int64_t n, const float *x, - int64_t incx, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, int64_t n, const float* x, + int64_t incx, float* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy(backend_selector selector, int64_t n, const double *x, - int64_t incx, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, int64_t n, const double* x, + int64_t incx, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } sycl::event copy(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, - dependencies); + dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t *n, const float **x, - int64_t *incx, float **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t* n, const float** x, + int64_t* incx, float** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t *n, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t* n, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t *n, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t* n, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t *n, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t* n, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t n, const float *x, - int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t n, const float* x, + int64_t incx, int64_t stridex, float* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t n, const double *x, - int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t n, const double* x, + int64_t incx, int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - const float *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + transpose transb, int64_t n, int64_t k, float alpha, const float* a, int64_t lda, + const float* b, int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, int64_t n, int64_t k, double alpha, const double *a, - int64_t lda, const double *b, int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { + transpose transb, int64_t n, int64_t k, double alpha, const double* a, + int64_t lda, const double* b, int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, const std::complex* b, + int64_t ldb, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, const std::complex* b, + int64_t ldb, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, - transb, n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies); + transb, n, k, alpha, a, lda, b, ldb, beta, + c, ldc, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, const float *x, int64_t incx, - float beta, float *y, int64_t incy, const std::vector &dependencies) { + int64_t k, float alpha, const float* a, int64_t lda, const float* x, int64_t incx, + float beta, float* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event sbmv(backend_selector selector, uplo upper_lower, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, const double *x, - int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies) { + int64_t k, double alpha, const double* a, int64_t lda, const double* x, + int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, - lda, x, incx, beta, y, incy, dependencies); + lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event asum(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event asum(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, int64_t n, const float *x, - int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(backend_selector selector, int64_t n, const float* x, + int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event asum(backend_selector selector, int64_t n, const double *x, - int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(backend_selector selector, int64_t n, const double* x, + int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const float *a, int64_t lda, float *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const float* a, int64_t lda, float* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const double *a, int64_t lda, double *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const double* a, int64_t lda, double* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *x, int64_t incx, const float *y, int64_t incy, float *a, - const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + float alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* a, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, + alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *x, int64_t incx, const double *y, int64_t incy, - double *a, const std::vector &dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, - x, incx, y, incy, a, dependencies); + double alpha, const double* x, int64_t incx, const double* y, int64_t incy, + double* a, const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, + alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, int64_t n, const float *x, - int64_t incx, int64_t *result, const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, int64_t n, const float* x, + int64_t incx, int64_t* result, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event iamax(backend_selector selector, int64_t n, const double *x, - int64_t incx, int64_t *result, const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, int64_t n, const double* x, + int64_t incx, int64_t* result, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } sycl::event iamax(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, - dependencies); + dependencies); return done; } -sycl::event rotm(backend_selector selector, int64_t n, float *x, int64_t incx, - float *y, int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, int64_t n, float* x, int64_t incx, + float* y, int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotm(backend_selector selector, int64_t n, double *x, int64_t incx, - double *y, int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, int64_t n, double* x, int64_t incx, + double* y, int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, - param, dependencies); + param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, + float* s, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event sdsdot(backend_selector selector, int64_t n, float sb, const float *x, - int64_t incx, const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(backend_selector selector, int64_t n, float sb, const float* x, + int64_t incx, const float* y, int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, - incy, result, dependencies); + incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, float beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, float beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, double beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = - oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, - alpha, a, lda, b, ldb, beta, c, ldc, dependencies); + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, double beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, + n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies); return done; } -sycl::event dot(backend_selector selector, int64_t n, const float *x, - int64_t incx, const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, int64_t n, const float* x, + int64_t incx, const float* y, int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, int64_t n, const double *x, - int64_t incx, const double *y, int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, int64_t n, const double* x, + int64_t incx, const double* y, int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } -sycl::event dot(backend_selector selector, int64_t n, const float *x, - int64_t incx, const float *y, int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, int64_t n, const float* x, + int64_t incx, const float* y, int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, - result, dependencies); + result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy, const std::vector &dependencies) { + float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta, + float* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *a, int64_t lda, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, int64_t lda, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3821,10 +3825,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpos } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3833,9 +3837,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpos sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3844,9 +3848,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpos sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3854,18 +3858,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpos } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -3873,9 +3877,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpos sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -3883,9 +3887,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpos sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -3893,10 +3897,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpos sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -3905,10 +3909,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -3917,11 +3921,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -3930,11 +3934,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -3942,150 +3946,150 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, - alpha, a, lda, b, ldb, dependencies); + alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, dependencies); + alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, - n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies); + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, - n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies); + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, - n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies); + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, - n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies); + n, alpha, a, lda, beta, b, ldb, c, ldc, + dependencies); return done; } @@ -4115,8 +4119,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpos std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4127,8 +4130,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpos std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::math::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4138,22 +4140,20 @@ sycl::event omatcopy_batch(backend_selector selector, transpos sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch( + selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, + dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + std::int64_t* groupsize, const std::vector& dependencies) { + auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch( + selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, + dependencies); return done; } @@ -4162,9 +4162,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpos std::complex** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch( + selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, + dependencies); return done; } @@ -4173,8 +4173,8 @@ sycl::event imatcopy_batch(backend_selector selector, transpos std::complex** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, const std::vector& dependencies) { - auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, - alpha, ab, lda, ldb, group_count, - groupsize, dependencies); + auto done = oneapi::math::blas::rocblas::MAJOR::imatcopy_batch( + selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, + dependencies); return done; } diff --git a/include/oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hxx b/include/oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hxx index 70aabaaf9..f6c3eeee5 100644 --- a/include/oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hxx +++ b/include/oneapi/math/blas/detail/rocblas/onemath_blas_rocblas.hxx @@ -21,2102 +21,2102 @@ // Buffer APIs -void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void axpy(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void axpy(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void axpy(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void axpy(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void axpy(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void axpy(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void axpy(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void axpy(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy); +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy); -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy); +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy); -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void copy(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void copy(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result); -void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result); -void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result); -void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, float c, float s); +void rot(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, float c, float s); -void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, double c, double s); +void rot(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, double c, double s); -void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, float c, float s); +void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, float c, float s); -void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, double c, double s); +void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, double c, double s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param); -void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param); -void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx); +void scal(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx); +void scal(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx); +void scal(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx); +void scal(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, int64_t n, float alpha, sycl::buffer, 1>& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, int64_t n, double alpha, sycl::buffer, 1>& x, int64_t incx); -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result); +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result); -void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void swap(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void swap(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, float beta, sycl::buffer &y, int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, float beta, sycl::buffer& y, int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, double beta, sycl::buffer &y, int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, double beta, sycl::buffer& y, int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy); +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy); -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy); +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy); -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, float beta, sycl::buffer &y, int64_t incy, +void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, float beta, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, double beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size); +void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, double beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, - int64_t stridex, std::complex beta, sycl::buffer, 1> &y, +void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, + int64_t stridex, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, +void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &c, int64_t ldc, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size); +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stridea, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &c, int64_t ldc, int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stridea, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& c, int64_t ldc, int64_t stridec, int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stridea, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &c, int64_t ldc, int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stridea, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& c, int64_t ldc, int64_t stridec, int64_t batch_size); -void ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, int64_t m, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda); -void ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, int64_t m, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda); -void gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); - -void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); - -void her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda); - -void her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda); - -void her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); + +void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); + +void her(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda); + +void her(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + int64_t incx, std::complex beta, sycl::buffer, 1>& y, int64_t incy); -void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + int64_t incx, std::complex beta, sycl::buffer, 1>& y, int64_t incy); -void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a); -void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a); -void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy); -void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, int64_t incx, float beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, int64_t incx, float beta, sycl::buffer& y, int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, int64_t incx, double beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, int64_t incx, double beta, sycl::buffer& y, int64_t incy); -void spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a); -void spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a); -void symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx, float beta, - sycl::buffer &y, int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx, float beta, + sycl::buffer& y, int64_t incy); -void symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx, double beta, - sycl::buffer &y, int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx, double beta, + sycl::buffer& y, int64_t incy); -void syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a, int64_t lda); +void syr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a, int64_t lda); -void syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a, int64_t lda); +void syr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a, int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, sycl::buffer &x, int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, sycl::buffer& x, int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, sycl::buffer &x, int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, sycl::buffer& x, int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, sycl::buffer &x, int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, sycl::buffer& x, int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, sycl::buffer &x, int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, sycl::buffer& x, int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx); - -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); + +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); - -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, double beta, sycl::buffer &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - sycl::half alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, sycl::half beta, - sycl::buffer &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer, 1> &a, int64_t lda, float beta, - sycl::buffer, 1> &c, int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, - sycl::buffer, 1> &a, int64_t lda, double beta, - sycl::buffer, 1> &c, int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, float beta, - sycl::buffer, 1> &c, int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, double beta, - sycl::buffer, 1> &c, int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - float beta, sycl::buffer &c, int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - double beta, sycl::buffer &c, int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, float beta, sycl::buffer &c, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); + +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, double beta, sycl::buffer& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + sycl::half alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, sycl::half beta, + sycl::buffer& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer, 1>& a, int64_t lda, float beta, + sycl::buffer, 1>& c, int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, + sycl::buffer, 1>& a, int64_t lda, double beta, + sycl::buffer, 1>& c, int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, float beta, + sycl::buffer, 1>& c, int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, double beta, + sycl::buffer, 1>& c, int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + float beta, sycl::buffer& c, int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + double beta, sycl::buffer& c, int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, float beta, sycl::buffer& c, int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, - sycl::buffer &a, int64_t lda, double beta, sycl::buffer &c, +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, + sycl::buffer& a, int64_t lda, double beta, sycl::buffer& c, int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc); +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc); +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - float beta, sycl::buffer &c, int64_t ldc); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + float beta, sycl::buffer& c, int64_t ldc); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, double beta, sycl::buffer &c, int64_t ldc); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, double beta, sycl::buffer& c, int64_t ldc); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb); +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb); +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb); +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); - -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); - -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, double beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); - -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, - int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, double beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, + int64_t stride_b, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc); - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co); - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co); - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co); - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co); - -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size); - -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size); - -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc); + +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co); + +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); + +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); + +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc); // USM APIs -sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + float* result, const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + double* result, const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* result, + const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, float *y, - int64_t incy, const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, float* y, + int64_t incy, const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double *y, int64_t incy, const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double* y, int64_t incy, const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - const float beta, float *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + const float beta, float* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - const double beta, double *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + const double beta, double* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *y, - int64_t incy, const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* y, + int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *y, - int64_t incy, const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* y, + int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, float **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, double **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t stridex, - float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t stridex, + float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t stridex, std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t stridex, std::complex* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t stridex, std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t stridex, std::complex* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, float *result, const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, float* result, const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, int64_t n, const double *x, int64_t incx, const double *y, - int64_t incy, double *result, const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, int64_t n, const double* x, int64_t incx, const double* y, + int64_t incy, double* result, const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies = {}); -sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies = {}); -sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies = {}); -sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies = {}); -sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, int64_t n, const double *x, int64_t incx, int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, int64_t n, const double* x, int64_t incx, int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, int64_t n, const double *x, int64_t incx, int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, int64_t n, const double* x, int64_t incx, int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + float* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + double* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* result, + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, float c, float s, - const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, float c, float s, + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, double c, double s, - const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, double c, double s, + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - float c, float s, const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + float c, float s, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - double c, double s, const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + double c, double s, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, float *c, - std::complex *s, const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, double *c, - std::complex *s, const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - float *param, const std::vector &dependencies = {}); +sycl::event rotm(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + float* param, const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - double *param, const std::vector &dependencies = {}); +sycl::event rotm(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + double* param, const std::vector& dependencies = {}); -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, - const std::vector &dependencies = {}); +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, float* param, + const std::vector& dependencies = {}); -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, - const std::vector &dependencies = {}); +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, double* param, + const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, float alpha, float *x, int64_t incx, - const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, float alpha, float* x, int64_t incx, + const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, double alpha, double *x, int64_t incx, - const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, double alpha, double* x, int64_t incx, + const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, std::complex alpha, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, std::complex alpha, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, std::complex alpha, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, std::complex alpha, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, float alpha, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, float alpha, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, double alpha, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, double alpha, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies = {}); +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - float alpha, const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy, const std::vector &dependencies = {}); +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta, + float* y, int64_t incy, const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - double alpha, const double *a, int64_t lda, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + double alpha, const double* a, int64_t lda, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies = {}); +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies = {}); -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies = {}); +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies = {}); -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stridea, const float *x, int64_t incx, - int64_t stridex, float beta, float *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stridea, const float* x, int64_t incx, + int64_t stridex, float beta, float* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stridea, const double *x, int64_t incx, - int64_t stridex, double beta, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stridea, const double* x, int64_t incx, + int64_t stridex, double beta, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stridea, const std::complex *x, int64_t incx, int64_t stridex, - std::complex beta, std::complex *y, int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stridea, const std::complex* x, int64_t incx, int64_t stridex, + std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stridea, const std::complex *x, int64_t incx, - int64_t stridex, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stridea, const std::complex* x, int64_t incx, + int64_t stridex, std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, const float **x, int64_t *incx, float *beta, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, const double **x, int64_t *incx, - double *beta, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const float *a, - int64_t lda, int64_t stridea, const float *x, int64_t incx, int64_t stridex, - float *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const double *a, - int64_t lda, int64_t stridea, const double *x, int64_t incx, int64_t stridex, - double *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stridea, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stridea, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, double **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, const float *x, int64_t incx, - const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *x, - int64_t incx, std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *x, - int64_t incx, std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - const std::complex *x, int64_t incx, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - const std::complex *x, int64_t incx, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - const std::complex *x, int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - const std::complex *x, int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *a, - const float *x, int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *a, - const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, float *a, const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, double *a, const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, const float *y, int64_t incy, float *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *a, - int64_t lda, const float *x, int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *a, - int64_t lda, const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, float *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, double *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, - float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, const sycl::half *b, - int64_t ldb, sycl::half beta, sycl::half *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, const sycl::half *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const bfloat16 *a, int64_t lda, const bfloat16 *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const std::complex *a, int64_t lda, float beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const std::complex *a, int64_t lda, double beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, float beta, std::complex *c, - int64_t ldc, const std::vector &dependencies = {}); - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, double beta, std::complex *c, - int64_t ldc, const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, float beta, - float *c, int64_t ldc, const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, const double *b, int64_t ldb, - double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, int64_t stride_a, float beta, - float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, int64_t stride_a, double beta, - double *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, const float** x, int64_t* incx, float* beta, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, const double** x, int64_t* incx, + double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stridea, const float* x, int64_t incx, int64_t stridex, + float* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stridea, const double* x, int64_t incx, int64_t stridex, + double* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stridea, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stridea, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, int64_t m, int64_t n, float alpha, const float* x, int64_t incx, + const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, int64_t m, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, const std::complex* x, + int64_t incx, std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, const std::complex* x, + int64_t incx, std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + const std::complex* x, int64_t incx, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + const std::complex* x, int64_t incx, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + const std::complex* x, int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + const std::complex* x, int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, float alpha, + const float* a, int64_t lda, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, double alpha, + const double* a, int64_t lda, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* a, + const float* x, int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* a, + const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, float* a, const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, double* a, const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, const float* y, int64_t incy, float* a, + const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* a, + int64_t lda, const float* x, int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* a, + int64_t lda, const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, float* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, double* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, + float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, const sycl::half* b, + int64_t ldb, sycl::half beta, sycl::half* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, const sycl::half* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const bfloat16* a, int64_t lda, const bfloat16* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const std::complex* a, int64_t lda, float beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const std::complex* a, int64_t lda, double beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, float beta, std::complex* c, + int64_t ldc, const std::vector& dependencies = {}); + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, double beta, std::complex* c, + int64_t ldc, const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, float beta, + float* c, int64_t ldc, const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, const double* b, int64_t ldb, + double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, float beta, - float *c, int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, const double *b, int64_t ldb, - double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, int64_t lda, - float *b, int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, int64_t lda, - double *b, int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, float beta, + float* c, int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, const double* b, int64_t ldb, + double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, int64_t lda, + float* b, int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, int64_t lda, + double* b, int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, int64_t lda, - float *b, int64_t ldb, const std::vector &dependencies = {}); +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, int64_t lda, + float* b, int64_t ldb, const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, int64_t lda, - double *b, int64_t ldb, const std::vector &dependencies = {}); +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, int64_t lda, + double* b, int64_t ldb, const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, float *alpha, const float **a, - int64_t *lda, float **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, double *alpha, const double **a, - int64_t *lda, double **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex **b, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex **b, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, - const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, - const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, const std::complex **b, - int64_t *ldb, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, const std::complex **b, - int64_t *ldb, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, sycl::half *alpha, const sycl::half **a, - int64_t *lda, const sycl::half **b, int64_t *ldb, sycl::half *beta, - sycl::half **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const sycl::half **a, int64_t *lda, - const sycl::half **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, std::int32_t **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, int64_t stride_a, - const float *b, int64_t ldb, int64_t stride_b, float beta, float *c, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, float* alpha, const float** a, + int64_t* lda, float** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, double* alpha, const double** a, + int64_t* lda, double** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const float** a, int64_t* lda, + const float** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, double* alpha, const double** a, int64_t* lda, + const double** b, int64_t* ldb, double* beta, double** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, sycl::half* alpha, const sycl::half** a, + int64_t* lda, const sycl::half** b, int64_t* ldb, sycl::half* beta, + sycl::half** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const sycl::half** a, int64_t* lda, + const sycl::half** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, std::int32_t** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, int64_t stride_a, + const float* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, int64_t stride_a, - const double *b, int64_t ldb, int64_t stride_b, double beta, double *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, int64_t stride_a, + const double* b, int64_t ldb, int64_t stride_b, double beta, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, - int64_t stride_a, const sycl::half *b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::half *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, + int64_t stride_a, const sycl::half* b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::half* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, int64_t stride_a, - const sycl::half *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, int64_t stride_a, + const sycl::half* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, - std::int32_t *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, - int64_t lda, std::int8_t ao, const std::uint8_t *b, int64_t ldb, - std::uint8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, - int64_t lda, std::int8_t ao, const std::int8_t *b, int64_t ldb, - std::int8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t *a, - int64_t lda, std::uint8_t ao, const std::int8_t *b, int64_t ldb, - std::int8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t *a, - int64_t lda, std::uint8_t ao, const std::uint8_t *b, int64_t ldb, - std::uint8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, + std::int32_t* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t* a, + int64_t lda, std::int8_t ao, const std::uint8_t* b, int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t* a, + int64_t lda, std::int8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t* a, + int64_t lda, std::uint8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t* a, + int64_t lda, std::uint8_t ao, const std::uint8_t* b, int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, std::int64_t stridea, float *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, std::int64_t stridea, double *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float beta, const float *b, - int64_t ldb, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double beta, const double *b, - int64_t ldb, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, float* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, std::int64_t stridea, float* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, std::int64_t stridea, double* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, float beta, const float* b, + int64_t ldb, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, double beta, const double* b, + int64_t ldb, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, diff --git a/include/oneapi/math/detail/exceptions.hpp b/include/oneapi/math/detail/exceptions.hpp index 8a82db01a..eb8cc0b12 100644 --- a/include/oneapi/math/detail/exceptions.hpp +++ b/include/oneapi/math/detail/exceptions.hpp @@ -31,14 +31,14 @@ namespace math { class backend_not_found : public oneapi::math::exception { public: - backend_not_found(const std::string &info = "") + backend_not_found(const std::string& info = "") : oneapi::math::exception( "", "", ((info.length() != 0) ? info : "Couldn't load selected backend")) {} }; class function_not_found : public oneapi::math::exception { public: - function_not_found(const std::string &info = "") + function_not_found(const std::string& info = "") : oneapi::math::exception( "", "", ((info.length() != 0) ? info : "Couldn't load functions from selected backend")) { @@ -48,8 +48,8 @@ class function_not_found : public oneapi::math::exception { class library_not_found : public oneapi::math::exception { public: library_not_found(const std::string& message) : exception(message) {} - library_not_found(const std::string &domain, const std::string &function, - const std::string &info = "") + library_not_found(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::math::exception( domain, function, "library not found" + ((info.length() != 0) ? (": " + info) : "")) {} @@ -57,10 +57,11 @@ class library_not_found : public oneapi::math::exception { class specification_mismatch : public oneapi::math::exception { public: - specification_mismatch(const std::string &info = "") + specification_mismatch(const std::string& info = "") : oneapi::math::exception( "", "", - ((info.length() != 0) ? info : "Loaded oneMath specification version mismatch")) {} + ((info.length() != 0) ? info : "Loaded oneMath specification version mismatch")) { + } }; } // namespace math diff --git a/include/oneapi/math/detail/get_device_id.hpp b/include/oneapi/math/detail/get_device_id.hpp index c81f96c13..32d493059 100644 --- a/include/oneapi/math/detail/get_device_id.hpp +++ b/include/oneapi/math/detail/get_device_id.hpp @@ -40,7 +40,7 @@ namespace oneapi { namespace math { -inline oneapi::math::device get_device_id(sycl::queue &queue) { +inline oneapi::math::device get_device_id(sycl::queue& queue) { oneapi::math::device device_id; if (queue.get_device().is_cpu()) device_id = device::x86cpu; diff --git a/include/oneapi/math/dft/backward.hpp b/include/oneapi/math/dft/backward.hpp index 50c9f4234..6d7f7f122 100644 --- a/include/oneapi/math/dft/backward.hpp +++ b/include/oneapi/math/dft/backward.hpp @@ -33,7 +33,7 @@ namespace oneapi::math::dft { //In-place transform template -void compute_backward(descriptor_type &desc, sycl::buffer &inout) { +void compute_backward(descriptor_type& desc, sycl::buffer& inout) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); @@ -46,8 +46,8 @@ void compute_backward(descriptor_type &desc, sycl::buffer &inout) //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template , bool> = true> -void compute_backward(descriptor_type &desc, sycl::buffer &inout_re, - sycl::buffer &inout_im) { +void compute_backward(descriptor_type& desc, sycl::buffer& inout_re, + sycl::buffer& inout_im) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); @@ -61,8 +61,8 @@ void compute_backward(descriptor_type &desc, sycl::buffer &inout_r //Out-of-place transform template -void compute_backward(descriptor_type &desc, sycl::buffer &in, - sycl::buffer &out) { +void compute_backward(descriptor_type& desc, sycl::buffer& in, + sycl::buffer& out) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -79,9 +79,9 @@ void compute_backward(descriptor_type &desc, sycl::buffer &in, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -void compute_backward(descriptor_type &desc, sycl::buffer &in_re, - sycl::buffer &in_im, sycl::buffer &out_re, - sycl::buffer &out_im) { +void compute_backward(descriptor_type& desc, sycl::buffer& in_re, + sycl::buffer& in_im, sycl::buffer& out_re, + sycl::buffer& out_im) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -104,34 +104,32 @@ void compute_backward(descriptor_type &desc, sycl::buffer &in_re, //In-place transform template -sycl::event compute_backward(descriptor_type &desc, data_type *inout, - const std::vector &dependencies = {}) { +sycl::event compute_backward(descriptor_type& desc, data_type* inout, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); using fwd_type = typename detail::descriptor_info::forward_type; - return get_commit(desc)->backward_ip_cc(desc, reinterpret_cast(inout), - dependencies); + return get_commit(desc)->backward_ip_cc(desc, reinterpret_cast(inout), dependencies); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template , bool> = true> -sycl::event compute_backward(descriptor_type &desc, data_type *inout_re, data_type *inout_im, - const std::vector &dependencies = {}) { +sycl::event compute_backward(descriptor_type& desc, data_type* inout_re, data_type* inout_im, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); using scalar_type = typename detail::descriptor_info::scalar_type; - return get_commit(desc)->backward_ip_rr(desc, reinterpret_cast(inout_re), - reinterpret_cast(inout_im), - dependencies); + return get_commit(desc)->backward_ip_rr(desc, reinterpret_cast(inout_re), + reinterpret_cast(inout_im), dependencies); } //Out-of-place transform template -sycl::event compute_backward(descriptor_type &desc, input_type *in, output_type *out, - const std::vector &dependencies = {}) { +sycl::event compute_backward(descriptor_type& desc, input_type* in, output_type* out, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -139,25 +137,25 @@ sycl::event compute_backward(descriptor_type &desc, input_type *in, output_type using fwd_type = typename detail::descriptor_info::forward_type; using bwd_type = typename detail::descriptor_info::backward_type; - return get_commit(desc)->backward_op_cc(desc, reinterpret_cast(in), - reinterpret_cast(out), dependencies); + return get_commit(desc)->backward_op_cc(desc, reinterpret_cast(in), + reinterpret_cast(out), dependencies); } //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -sycl::event compute_backward(descriptor_type &desc, input_type *in_re, input_type *in_im, - output_type *out_re, output_type *out_im, - const std::vector &dependencies = {}) { +sycl::event compute_backward(descriptor_type& desc, input_type* in_re, input_type* in_im, + output_type* out_re, output_type* out_im, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, "unexpected type for output_type"); using scalar_type = typename detail::descriptor_info::scalar_type; - return get_commit(desc)->backward_op_rr(desc, reinterpret_cast(in_re), - reinterpret_cast(in_im), - reinterpret_cast(out_re), - reinterpret_cast(out_im), dependencies); + return get_commit(desc)->backward_op_rr(desc, reinterpret_cast(in_re), + reinterpret_cast(in_im), + reinterpret_cast(out_re), + reinterpret_cast(out_im), dependencies); } } // namespace oneapi::math::dft diff --git a/include/oneapi/math/dft/detail/commit_impl.hpp b/include/oneapi/math/dft/detail/commit_impl.hpp index d3f7d7d8d..6aaa92580 100644 --- a/include/oneapi/math/dft/detail/commit_impl.hpp +++ b/include/oneapi/math/dft/detail/commit_impl.hpp @@ -54,18 +54,18 @@ class commit_impl { public: commit_impl(sycl::queue queue, math::backend backend, - const dft::detail::dft_values &config_values) + const dft::detail::dft_values& config_values) : queue_(queue), backend_(backend), external_workspace_helper_(config_values.workspace_placement == dft::detail::config_value::WORKSPACE_EXTERNAL) {} // rule of three - commit_impl(const commit_impl &other) = delete; - commit_impl &operator=(const commit_impl &other) = delete; + commit_impl(const commit_impl& other) = delete; + commit_impl& operator=(const commit_impl& other) = delete; virtual ~commit_impl() = default; - sycl::queue &get_queue() noexcept { + sycl::queue& get_queue() noexcept { return queue_; } @@ -73,9 +73,9 @@ class commit_impl { return backend_; } - virtual void *get_handle() noexcept = 0; + virtual void* get_handle() noexcept = 0; - virtual void commit(const dft_values &) = 0; + virtual void commit(const dft_values&) = 0; inline std::int64_t get_workspace_external_bytes() { return external_workspace_helper_.get_rqd_workspace_bytes(*this); @@ -87,54 +87,54 @@ class commit_impl { // When not overridden, external workspace support is faked: an external workspace can be set, // and errors will be generated according to the specificiation, // but the required workspace size will always be zero, and any given workspace will not actually be used. - virtual void set_workspace(scalar_type *usm_workspace) { + virtual void set_workspace(scalar_type* usm_workspace) { external_workspace_helper_.set_workspace_throw(*this, usm_workspace); } - virtual void set_workspace(sycl::buffer &buffer_workspace) { + virtual void set_workspace(sycl::buffer& buffer_workspace) { external_workspace_helper_.set_workspace_throw(*this, buffer_workspace); } - virtual void forward_ip_cc(descriptor_type &desc, sycl::buffer &inout) = 0; - virtual void forward_ip_rr(descriptor_type &desc, sycl::buffer &inout_re, - sycl::buffer &inout_im) = 0; - virtual void forward_op_cc(descriptor_type &desc, sycl::buffer &in, - sycl::buffer &out) = 0; - virtual void forward_op_rr(descriptor_type &desc, sycl::buffer &in_re, - sycl::buffer &in_im, - sycl::buffer &out_re, - sycl::buffer &out_im) = 0; - - virtual sycl::event forward_ip_cc(descriptor_type &desc, fwd_type *inout, - const std::vector &dependencies) = 0; - virtual sycl::event forward_ip_rr(descriptor_type &desc, scalar_type *inout_re, - scalar_type *inout_im, - const std::vector &dependencies) = 0; - virtual sycl::event forward_op_cc(descriptor_type &desc, fwd_type *in, bwd_type *out, - const std::vector &dependencies) = 0; - virtual sycl::event forward_op_rr(descriptor_type &desc, scalar_type *in_re, scalar_type *in_im, - scalar_type *out_re, scalar_type *out_im, - const std::vector &dependencies) = 0; - - virtual void backward_ip_cc(descriptor_type &desc, sycl::buffer &inout) = 0; - virtual void backward_ip_rr(descriptor_type &desc, sycl::buffer &inout_re, - sycl::buffer &inout_im) = 0; - virtual void backward_op_cc(descriptor_type &desc, sycl::buffer &in, - sycl::buffer &out) = 0; - virtual void backward_op_rr(descriptor_type &desc, sycl::buffer &in_re, - sycl::buffer &in_im, - sycl::buffer &out_re, - sycl::buffer &out_im) = 0; - - virtual sycl::event backward_ip_cc(descriptor_type &desc, fwd_type *inout, - const std::vector &dependencies) = 0; - virtual sycl::event backward_ip_rr(descriptor_type &desc, scalar_type *inout_re, - scalar_type *inout_im, - const std::vector &dependencies) = 0; - virtual sycl::event backward_op_cc(descriptor_type &desc, bwd_type *in, fwd_type *out, - const std::vector &dependencies) = 0; - virtual sycl::event backward_op_rr(descriptor_type &desc, scalar_type *in_re, - scalar_type *in_im, scalar_type *out_re, scalar_type *out_im, - const std::vector &dependencies) = 0; + virtual void forward_ip_cc(descriptor_type& desc, sycl::buffer& inout) = 0; + virtual void forward_ip_rr(descriptor_type& desc, sycl::buffer& inout_re, + sycl::buffer& inout_im) = 0; + virtual void forward_op_cc(descriptor_type& desc, sycl::buffer& in, + sycl::buffer& out) = 0; + virtual void forward_op_rr(descriptor_type& desc, sycl::buffer& in_re, + sycl::buffer& in_im, + sycl::buffer& out_re, + sycl::buffer& out_im) = 0; + + virtual sycl::event forward_ip_cc(descriptor_type& desc, fwd_type* inout, + const std::vector& dependencies) = 0; + virtual sycl::event forward_ip_rr(descriptor_type& desc, scalar_type* inout_re, + scalar_type* inout_im, + const std::vector& dependencies) = 0; + virtual sycl::event forward_op_cc(descriptor_type& desc, fwd_type* in, bwd_type* out, + const std::vector& dependencies) = 0; + virtual sycl::event forward_op_rr(descriptor_type& desc, scalar_type* in_re, scalar_type* in_im, + scalar_type* out_re, scalar_type* out_im, + const std::vector& dependencies) = 0; + + virtual void backward_ip_cc(descriptor_type& desc, sycl::buffer& inout) = 0; + virtual void backward_ip_rr(descriptor_type& desc, sycl::buffer& inout_re, + sycl::buffer& inout_im) = 0; + virtual void backward_op_cc(descriptor_type& desc, sycl::buffer& in, + sycl::buffer& out) = 0; + virtual void backward_op_rr(descriptor_type& desc, sycl::buffer& in_re, + sycl::buffer& in_im, + sycl::buffer& out_re, + sycl::buffer& out_im) = 0; + + virtual sycl::event backward_ip_cc(descriptor_type& desc, fwd_type* inout, + const std::vector& dependencies) = 0; + virtual sycl::event backward_ip_rr(descriptor_type& desc, scalar_type* inout_re, + scalar_type* inout_im, + const std::vector& dependencies) = 0; + virtual sycl::event backward_op_cc(descriptor_type& desc, bwd_type* in, fwd_type* out, + const std::vector& dependencies) = 0; + virtual sycl::event backward_op_rr(descriptor_type& desc, scalar_type* in_re, + scalar_type* in_im, scalar_type* out_re, scalar_type* out_im, + const std::vector& dependencies) = 0; /** For compute calls, throw errors for the external workspace as required. * @tparam ArgTs The non-descriptor arg(s) for the compute call. First one is used to check @@ -142,7 +142,7 @@ class commit_impl { * @param function_name The function name to user in generated exceptions. */ template - void compute_call_throw(const char *function_name) { + void compute_call_throw(const char* function_name) { external_workspace_helper_.template compute_call_throw(function_name); } @@ -151,14 +151,14 @@ class commit_impl { * @param function_name The function name to user in generated exceptions. * @param cgh The command group handler to associate the accessor with. */ - void add_buffer_workspace_dependency_if_rqd(const char *function_name, sycl::handler &cgh) { + void add_buffer_workspace_dependency_if_rqd(const char* function_name, sycl::handler& cgh) { external_workspace_helper_.add_buffer_dependency_if_rqd(function_name, cgh); } /** If WORKSPACE_EXTERNAL is set, depend on the last USM workspace event added via set_last_usm_workspace_event. * @param cgh The command group handler to associate the accessor with. */ - void depend_on_last_usm_workspace_event_if_rqd(sycl::handler &cgh) { + void depend_on_last_usm_workspace_event_if_rqd(sycl::handler& cgh) { external_workspace_helper_.depend_on_last_usm_workspace_event_if_rqd(cgh); } @@ -166,7 +166,7 @@ class commit_impl { * subsequent calls to depend_on_last_usm_workspace_event. * @param sycl_event The last usage of the USM workspace. */ - void set_last_usm_workspace_event_if_rqd(sycl::event &sycl_event) { + void set_last_usm_workspace_event_if_rqd(sycl::event& sycl_event) { external_workspace_helper_.set_last_usm_workspace_event_if_rqd(sycl_event); } diff --git a/include/oneapi/math/dft/detail/dft_ct.hxx b/include/oneapi/math/dft/detail/dft_ct.hxx index bf479bde1..e03298c5e 100644 --- a/include/oneapi/math/dft/detail/dft_ct.hxx +++ b/include/oneapi/math/dft/detail/dft_ct.hxx @@ -20,8 +20,8 @@ // Commit template -ONEMATH_EXPORT dft::detail::commit_impl *create_commit( - const dft::detail::descriptor &desc, sycl::queue &sycl_queue); +ONEMATH_EXPORT dft::detail::commit_impl* create_commit( + const dft::detail::descriptor& desc, sycl::queue& sycl_queue); // BUFFER version @@ -34,105 +34,107 @@ using bwd = typename detail::descriptor_info::backward_type; //In-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout); +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout); //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im); +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im); //Out-of-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out); +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out); //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im); +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im); //USM version //In-place transform template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies); +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies); //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &dependencies); +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* inout_re, + scalar* inout_im, + const std::vector& dependencies); //Out-of-place transform template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &dependencies); +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& dependencies); //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &dependencies); +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& dependencies); // BUFFER version //In-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout); +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout); //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im); +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im); //Out-of-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out); +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out); //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im); +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im); //USM version //In-place transform template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies); +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies); //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &dependencies); +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, + scalar* inout_re, + scalar* inout_im, + const std::vector& dependencies); //Out-of-place transform template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &dependencies); +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& dependencies); //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &dependencies); +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& dependencies); diff --git a/include/oneapi/math/dft/detail/dft_loader.hpp b/include/oneapi/math/dft/detail/dft_loader.hpp index 41def11db..34fb2da56 100644 --- a/include/oneapi/math/dft/detail/dft_loader.hpp +++ b/include/oneapi/math/dft/detail/dft_loader.hpp @@ -42,7 +42,7 @@ class descriptor; template ONEMATH_EXPORT commit_impl* create_commit(const descriptor& desc, - sycl::queue& queue); + sycl::queue& queue); } // namespace detail } // namespace dft diff --git a/include/oneapi/math/dft/detail/external_workspace_helper.hpp b/include/oneapi/math/dft/detail/external_workspace_helper.hpp index 24bdf93c5..31d1d7ffc 100644 --- a/include/oneapi/math/dft/detail/external_workspace_helper.hpp +++ b/include/oneapi/math/dft/detail/external_workspace_helper.hpp @@ -91,7 +91,7 @@ class external_workspace_helper { void set_workspace_throw(commit_impl_t& committed_desc, scalar_t* usm_workspace) { if (get_rqd_workspace_bytes(committed_desc) > 0 && usm_workspace == nullptr) { throw math::invalid_argument("DFT", "set_workspace", - "Backend expected a non-null workspace pointer."); + "Backend expected a non-null workspace pointer."); } m_ext_workspace_rqd = true; m_workspace_type = ext_workspace_type::usm; @@ -110,7 +110,7 @@ class external_workspace_helper { } if (buffer_workspace.is_sub_buffer()) { throw math::invalid_argument("DFT", "set_workspace", - "Cannot use sub-buffers for workspace"); + "Cannot use sub-buffers for workspace"); return; } m_ext_workspace_rqd = true; diff --git a/include/oneapi/math/dft/detail/types_impl.hpp b/include/oneapi/math/dft/detail/types_impl.hpp index da17c762c..233a8bd0b 100644 --- a/include/oneapi/math/dft/detail/types_impl.hpp +++ b/include/oneapi/math/dft/detail/types_impl.hpp @@ -113,7 +113,7 @@ using valid_compute_arg = typename std::bool_constant< template constexpr bool valid_ip_realreal_impl = - is_complex_dft&& std::is_same_v, data_t>; + is_complex_dft && std::is_same_v, data_t>; // compute the range of a reinterpreted buffer template diff --git a/include/oneapi/math/dft/forward.hpp b/include/oneapi/math/dft/forward.hpp index 67f106923..297582423 100644 --- a/include/oneapi/math/dft/forward.hpp +++ b/include/oneapi/math/dft/forward.hpp @@ -34,7 +34,7 @@ namespace oneapi::math::dft { //In-place transform template -void compute_forward(descriptor_type &desc, sycl::buffer &inout) { +void compute_forward(descriptor_type& desc, sycl::buffer& inout) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); @@ -47,8 +47,8 @@ void compute_forward(descriptor_type &desc, sycl::buffer &inout) { //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template , bool> = true> -void compute_forward(descriptor_type &desc, sycl::buffer &inout_re, - sycl::buffer &inout_im) { +void compute_forward(descriptor_type& desc, sycl::buffer& inout_re, + sycl::buffer& inout_im) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); @@ -62,8 +62,8 @@ void compute_forward(descriptor_type &desc, sycl::buffer &inout_re //Out-of-place transform template -void compute_forward(descriptor_type &desc, sycl::buffer &in, - sycl::buffer &out) { +void compute_forward(descriptor_type& desc, sycl::buffer& in, + sycl::buffer& out) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -80,9 +80,9 @@ void compute_forward(descriptor_type &desc, sycl::buffer &in, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -void compute_forward(descriptor_type &desc, sycl::buffer &in_re, - sycl::buffer &in_im, sycl::buffer &out_re, - sycl::buffer &out_im) { +void compute_forward(descriptor_type& desc, sycl::buffer& in_re, + sycl::buffer& in_im, sycl::buffer& out_re, + sycl::buffer& out_im) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -105,56 +105,56 @@ void compute_forward(descriptor_type &desc, sycl::buffer &in_re, //In-place transform template -sycl::event compute_forward(descriptor_type &desc, data_type *inout, - const std::vector &dependencies = {}) { +sycl::event compute_forward(descriptor_type& desc, data_type* inout, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); using fwd_type = typename detail::descriptor_info::forward_type; - return get_commit(desc)->forward_ip_cc(desc, reinterpret_cast(inout), dependencies); + return get_commit(desc)->forward_ip_cc(desc, reinterpret_cast(inout), dependencies); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template , bool> = true> -sycl::event compute_forward(descriptor_type &desc, data_type *inout_re, data_type *inout_im, - const std::vector &dependencies = {}) { +sycl::event compute_forward(descriptor_type& desc, data_type* inout_re, data_type* inout_im, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); using scalar_type = typename detail::descriptor_info::scalar_type; - return get_commit(desc)->forward_ip_rr(desc, reinterpret_cast(inout_re), - reinterpret_cast(inout_im), dependencies); + return get_commit(desc)->forward_ip_rr(desc, reinterpret_cast(inout_re), + reinterpret_cast(inout_im), dependencies); } //Out-of-place transform template -sycl::event compute_forward(descriptor_type &desc, input_type *in, output_type *out, - const std::vector &dependencies = {}) { +sycl::event compute_forward(descriptor_type& desc, input_type* in, output_type* out, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, "unexpected type for output_type"); using fwd_type = typename detail::descriptor_info::forward_type; using bwd_type = typename detail::descriptor_info::backward_type; - return get_commit(desc)->forward_op_cc(desc, reinterpret_cast(in), - reinterpret_cast(out), dependencies); + return get_commit(desc)->forward_op_cc(desc, reinterpret_cast(in), + reinterpret_cast(out), dependencies); } //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -sycl::event compute_forward(descriptor_type &desc, input_type *in_re, input_type *in_im, - output_type *out_re, output_type *out_im, - const std::vector &dependencies = {}) { +sycl::event compute_forward(descriptor_type& desc, input_type* in_re, input_type* in_im, + output_type* out_re, output_type* out_im, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, "unexpected type for output_type"); using scalar_type = typename detail::descriptor_info::scalar_type; - return get_commit(desc)->forward_op_rr(desc, reinterpret_cast(in_re), - reinterpret_cast(in_im), - reinterpret_cast(out_re), - reinterpret_cast(out_im), dependencies); + return get_commit(desc)->forward_op_rr(desc, reinterpret_cast(in_re), + reinterpret_cast(in_im), + reinterpret_cast(out_re), + reinterpret_cast(out_im), dependencies); } } // namespace oneapi::math::dft diff --git a/include/oneapi/math/exceptions.hpp b/include/oneapi/math/exceptions.hpp index ef99a1fb9..76f37b90f 100644 --- a/include/oneapi/math/exceptions.hpp +++ b/include/oneapi/math/exceptions.hpp @@ -39,7 +39,7 @@ class exception : public std::exception { public: exception(const std::string& message) : std::exception(), msg_(message) {} - exception(const std::string &domain, const std::string &function, const std::string &info = "") + exception(const std::string& domain, const std::string& function, const std::string& info = "") : std::exception() { msg_ = std::string("oneMath: ") + domain + ((domain.length() != 0 && function.length() != 0) ? "/" : "") + function + @@ -48,7 +48,7 @@ class exception : public std::exception { : ""); } - const char *what() const noexcept override { + const char* what() const noexcept override { return msg_.c_str(); } }; @@ -56,8 +56,8 @@ class exception : public std::exception { class unsupported_device : public oneapi::math::exception { public: unsupported_device(const std::string& message) : exception(message) {} - unsupported_device(const std::string &domain, const std::string &function, - const sycl::device &device) + unsupported_device(const std::string& domain, const std::string& function, + const sycl::device& device) : oneapi::math::exception( domain, function, device.get_info() + " is not supported") {} @@ -66,15 +66,15 @@ class unsupported_device : public oneapi::math::exception { class host_bad_alloc : public oneapi::math::exception { public: host_bad_alloc(const std::string& message) : exception(message) {} - host_bad_alloc(const std::string &domain, const std::string &function) + host_bad_alloc(const std::string& domain, const std::string& function) : oneapi::math::exception(domain, function, "cannot allocate memory on host") {} }; class device_bad_alloc : public oneapi::math::exception { public: device_bad_alloc(const std::string& message) : exception(message) {} - device_bad_alloc(const std::string &domain, const std::string &function, - const sycl::device &device) + device_bad_alloc(const std::string& domain, const std::string& function, + const sycl::device& device) : oneapi::math::exception( domain, function, "cannot allocate memory on " + device.get_info()) {} @@ -83,33 +83,33 @@ class device_bad_alloc : public oneapi::math::exception { class unimplemented : public oneapi::math::exception { public: unimplemented(const std::string& message) : exception(message) {} - unimplemented(const std::string &domain, const std::string &function, - const std::string &info = "") + unimplemented(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::math::exception(domain, function, "function is not implemented " + info) {} }; class invalid_argument : public oneapi::math::exception { public: invalid_argument(const std::string& message) : exception(message) {} - invalid_argument(const std::string &domain, const std::string &function, - const std::string &info = "") + invalid_argument(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::math::exception(domain, function, "invalid argument " + info) {} }; class uninitialized : public oneapi::math::exception { public: uninitialized(const std::string& message) : exception(message) {} - uninitialized(const std::string &domain, const std::string &function, - const std::string &info = "") + uninitialized(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::math::exception(domain, function, - "handle/descriptor is not initialized " + info) {} + "handle/descriptor is not initialized " + info) {} }; class computation_error : public oneapi::math::exception { public: computation_error(const std::string& message) : exception(message) {} - computation_error(const std::string &domain, const std::string &function, - const std::string &info = "") + computation_error(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::math::exception( domain, function, "computation error" + ((info.length() != 0) ? (": " + info) : "")) {} @@ -118,10 +118,10 @@ class computation_error : public oneapi::math::exception { class batch_error : public oneapi::math::exception { public: batch_error(const std::string& message) : exception(message) {} - batch_error(const std::string &domain, const std::string &function, - const std::string &info = "") - : oneapi::math::exception(domain, function, - "batch error" + ((info.length() != 0) ? (": " + info) : "")) {} + batch_error(const std::string& domain, const std::string& function, + const std::string& info = "") + : oneapi::math::exception( + domain, function, "batch error" + ((info.length() != 0) ? (": " + info) : "")) {} }; } // namespace math diff --git a/include/oneapi/math/lapack/detail/cusolver/lapack_ct.hxx b/include/oneapi/math/lapack/detail/cusolver/lapack_ct.hxx index 175e4500e..62998cae2 100644 --- a/include/oneapi/math/lapack/detail/cusolver/lapack_ct.hxx +++ b/include/oneapi/math/lapack/detail/cusolver/lapack_ct.hxx @@ -20,2293 +20,2309 @@ // Buffer APIs static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); + scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, - std::int64_t ldvt, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, +static inline void potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, +static inline void potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); -} -static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); +} +static inline void potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } -static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, +static inline void potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } -static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, +static inline void potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } // USM APIs static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, - float *e, std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, - double *tauq, double *taup, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tauq, double* taup, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, - float *tauq, float *taup, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tauq, float* taup, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* s, double* u, std::int64_t ldu, double* vt, + std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* s, float* u, std::int64_t ldu, float* vt, std::int64_t ldvt, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } -static inline sycl::event heevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event heevd(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } -static inline sycl::event heevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event heevd(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, float *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, double *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); -} -static inline sycl::event syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *w, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event syevd(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } -static inline sycl::event syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *w, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event syevd(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tau, float *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, - std::int64_t **ipiv, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, + std::int64_t** ipiv, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, - std::int64_t **ipiv, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + std::int64_t** ipiv, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( - backend_selector selector, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + backend_selector selector, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, + oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); -} -static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } // SCRATCHPAD APIs template std::int64_t gebrd_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::gebrd_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::math::lapack::cusolver::gebrd_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t gerqf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::gerqf_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::math::lapack::cusolver::gerqf_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t geqrf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::geqrf_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::math::lapack::cusolver::geqrf_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t gesvd_scratchpad_size(backend_selector selector, @@ -2319,28 +2335,28 @@ std::int64_t gesvd_scratchpad_size(backend_selector selector, template std::int64_t getrf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::getrf_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::math::lapack::cusolver::getrf_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t getri_scratchpad_size(backend_selector selector, std::int64_t n, std::int64_t lda) { return oneapi::math::lapack::cusolver::getri_scratchpad_size(selector.get_queue(), n, - lda); + lda); } template std::int64_t getrs_scratchpad_size(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return oneapi::math::lapack::cusolver::getrs_scratchpad_size(selector.get_queue(), - trans, n, nrhs, lda, ldb); + trans, n, nrhs, lda, ldb); } template std::int64_t heevd_scratchpad_size(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::heevd_scratchpad_size(selector.get_queue(), jobz, - uplo, n, lda); + return oneapi::math::lapack::cusolver::heevd_scratchpad_size(selector.get_queue(), + jobz, uplo, n, lda); } template std::int64_t hegvd_scratchpad_size(backend_selector selector, std::int64_t itype, @@ -2352,49 +2368,49 @@ std::int64_t hegvd_scratchpad_size(backend_selector selector, template std::int64_t hetrd_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::hetrd_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::cusolver::hetrd_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t hetrf_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::hetrf_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::cusolver::hetrf_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t orgbr_scratchpad_size(backend_selector selector, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::cusolver::orgbr_scratchpad_size(selector.get_queue(), vect, - m, n, k, lda); + return oneapi::math::lapack::cusolver::orgbr_scratchpad_size(selector.get_queue(), + vect, m, n, k, lda); } template std::int64_t orgtr_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::orgtr_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::cusolver::orgtr_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t orgqr_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::cusolver::orgqr_scratchpad_size(selector.get_queue(), m, n, - k, lda); + return oneapi::math::lapack::cusolver::orgqr_scratchpad_size(selector.get_queue(), m, + n, k, lda); } template std::int64_t ormrq_scratchpad_size(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::math::lapack::cusolver::ormrq_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::math::lapack::cusolver::ormrq_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t ormqr_scratchpad_size(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::math::lapack::cusolver::ormqr_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::math::lapack::cusolver::ormqr_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t ormtr_scratchpad_size(backend_selector selector, @@ -2407,34 +2423,34 @@ std::int64_t ormtr_scratchpad_size(backend_selector selector, template std::int64_t potrf_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::potrf_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::cusolver::potrf_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t potrs_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return oneapi::math::lapack::cusolver::potrs_scratchpad_size(selector.get_queue(), uplo, - n, nrhs, lda, ldb); + return oneapi::math::lapack::cusolver::potrs_scratchpad_size(selector.get_queue(), + uplo, n, nrhs, lda, ldb); } template std::int64_t potri_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::potri_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::cusolver::potri_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t sytrf_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::sytrf_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::cusolver::sytrf_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t syevd_scratchpad_size(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::syevd_scratchpad_size(selector.get_queue(), jobz, - uplo, n, lda); + return oneapi::math::lapack::cusolver::syevd_scratchpad_size(selector.get_queue(), + jobz, uplo, n, lda); } template std::int64_t sygvd_scratchpad_size(backend_selector selector, std::int64_t itype, @@ -2446,8 +2462,8 @@ std::int64_t sygvd_scratchpad_size(backend_selector selector, template std::int64_t sytrd_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::sytrd_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::cusolver::sytrd_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t trtrs_scratchpad_size(backend_selector selector, @@ -2461,36 +2477,36 @@ template std::int64_t ungbr_scratchpad_size(backend_selector selector, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::cusolver::ungbr_scratchpad_size(selector.get_queue(), vect, - m, n, k, lda); + return oneapi::math::lapack::cusolver::ungbr_scratchpad_size(selector.get_queue(), + vect, m, n, k, lda); } template std::int64_t ungqr_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::cusolver::ungqr_scratchpad_size(selector.get_queue(), m, n, - k, lda); + return oneapi::math::lapack::cusolver::ungqr_scratchpad_size(selector.get_queue(), m, + n, k, lda); } template std::int64_t ungtr_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::cusolver::ungtr_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::cusolver::ungtr_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t unmrq_scratchpad_size(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::math::lapack::cusolver::unmrq_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::math::lapack::cusolver::unmrq_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t unmqr_scratchpad_size(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::math::lapack::cusolver::unmqr_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::math::lapack::cusolver::unmqr_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t unmtr_scratchpad_size(backend_selector selector, @@ -2566,62 +2582,62 @@ std::int64_t ungqr_batch_scratchpad_size(backend_selector sel } template std::int64_t getrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::cusolver::getrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template std::int64_t getri_batch_scratchpad_size(backend_selector selector, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::cusolver::getri_batch_scratchpad_size( selector.get_queue(), n, lda, group_count, group_sizes); } template std::int64_t getrs_batch_scratchpad_size(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::cusolver::getrs_batch_scratchpad_size( selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes); } template std::int64_t geqrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::cusolver::geqrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template std::int64_t orgqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::math::lapack::cusolver::orgqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } template std::int64_t potrf_batch_scratchpad_size(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::math::lapack::cusolver::potrf_batch_scratchpad_size( selector.get_queue(), uplo, n, lda, group_count, group_sizes); } template std::int64_t potrs_batch_scratchpad_size(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::cusolver::potrs_batch_scratchpad_size( selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template std::int64_t ungqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::math::lapack::cusolver::ungqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } diff --git a/include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hxx b/include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hxx index ae8e8a147..a3d4de61c 100644 --- a/include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hxx +++ b/include/oneapi/math/lapack/detail/cusolver/onemath_lapack_cusolver.hxx @@ -19,1812 +19,1836 @@ // Buffer APIs -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); // USM APIs -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, - double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, - double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *w, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *w, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, + double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu, + float* vt, std::int64_t ldvt, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch( + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); // SCRATCHPAD APIs template -ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::int64_t lda, std::int64_t ldu, - std::int64_t ldvt); +ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t ldu, std::int64_t ldvt); template -ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, - std::int64_t m, std::int64_t n, std::int64_t k, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, + std::int64_t m, std::int64_t n, std::int64_t k, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, - std::int64_t m, std::int64_t n, std::int64_t k, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, + std::int64_t m, std::int64_t n, std::int64_t k, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda); +ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t stride_a, - std::int64_t stride_ipiv, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, - std::int64_t lda, std::int64_t stride_a, - std::int64_t stride_ipiv, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size); template ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t stride_a, - std::int64_t stride_tau, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda, - std::int64_t stride_a, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t stride_a, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda, std::int64_t stride_a, - std::int64_t stride_tau, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda, std::int64_t stride_a, - std::int64_t stride_tau, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); diff --git a/include/oneapi/math/lapack/detail/lapack_loader.hpp b/include/oneapi/math/lapack/detail/lapack_loader.hpp index 7e0da894b..be5015257 100644 --- a/include/oneapi/math/lapack/detail/lapack_loader.hpp +++ b/include/oneapi/math/lapack/detail/lapack_loader.hpp @@ -38,2344 +38,2356 @@ namespace math { namespace lapack { namespace detail { -ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, - std::int64_t ldvt, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void heevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void heevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hegvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hegvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hetrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hetrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hetrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hetrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void syevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void syevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sygvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void sygvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, - float *e, std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, - double *tauq, double *taup, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, - float *tauq, float *taup, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue &queue, +ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gesvd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgbr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgbr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungbr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungbr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue &queue, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue &queue, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue &queue, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event heevd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event heevd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hegvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, float *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hegvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hetrd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hetrd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hetrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hetrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgbr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgbr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgtr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgtr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormtr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormtr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormrq(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormrq(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormqr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormqr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue &queue, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue &queue, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue &queue, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syevd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *w, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syevd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *w, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sygvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sygvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrd(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungbr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungbr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungtr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungtr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmrq(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmrq(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmqr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmqr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmtr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmtr(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tauq, double* taup, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tauq, float* taup, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* s, double* u, std::int64_t ldu, double* vt, + std::int64_t ldvt, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* s, float* u, std::int64_t ldu, float* vt, std::int64_t ldvt, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event heevd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event heevd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hegvd(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hegvd(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hetrd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hetrd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hetrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hetrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgbr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgbr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgtr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgtr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormtr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormtr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormrq(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormrq(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormqr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormqr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syevd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syevd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sygvd(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sygvd(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrd(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungbr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungbr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungtr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungtr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmrq(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmrq(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmqr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmqr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmtr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmtr(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event getrs_batch( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, + std::int64_t** ipiv, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + std::int64_t** ipiv, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event getrs_batch( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, - std::int64_t **ipiv, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, - std::int64_t **ipiv, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event getrs_batch( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t stride_a, float *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t stride_a, double *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, std::int64_t stride_a, float* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr_batch( + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); ONEMATH_EXPORT sycl::event ungqr_batch( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); template = nullptr> -std::int64_t gebrd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t gebrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gerqf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t geqrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t getrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t getrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getri_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t heevd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t heevd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hegvd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t hegvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t hetrd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t hetrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hetrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t hetrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgbr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgbr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t orgtr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t potrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t potri_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potri_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sytrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t sytrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t sytrd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t sytrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t ungbr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ungbr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungqr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t ungqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungtr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ungtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t unmrq_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t unmrq_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmqr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t unmqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmtr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t unmtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ungqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t ungqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - std::int64_t m, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + std::int64_t m, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - std::int64_t m, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + std::int64_t m, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - std::int64_t m, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + std::int64_t m, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t ldu, std::int64_t ldvt); -template <> -ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> +ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, + std::int64_t ldvt); +template <> ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t getrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - std::int64_t m, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t getrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t getrf_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + std::int64_t m, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t getri_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t n, - std::int64_t lda); + sycl::queue& queue, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t getri_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t n, - std::int64_t lda); + sycl::queue& queue, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t getri_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t getri_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t getri_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, +ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> +ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, + sycl::queue& queue, + oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); +template <> ONEMATH_EXPORT std::int64_t getrs_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t getrs_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t heevd_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t heevd_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, +ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::generate vect, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda); + sycl::queue& queue, + oneapi::math::generate vect, + std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::generate vect, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda); + sycl::queue& queue, + oneapi::math::generate vect, + std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, + sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc); -template <> -ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc); -template <> -ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc); -template <> -ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t lda, std::int64_t ldc); -template <> -ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t lda, std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); +template <> +ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); +template <> +ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); +template <> +ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); +template <> +ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc); +template <> +ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t potrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t potrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t potrf_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t potrs_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t potrs_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t potri_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t potri_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t potri_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t potri_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t potri_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::job jobz, +ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, + sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, + sycl::queue& queue, + oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t itype, +ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, + sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> +ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, + sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda, std::int64_t ldb); +template <> ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); template <> -ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size>(oneapi::math::device libkey, + sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, - std::int64_t ldc); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, - std::int64_t ldc); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); } //namespace detail } //namespace lapack } //namespace math diff --git a/include/oneapi/math/lapack/detail/lapack_rt.hpp b/include/oneapi/math/lapack/detail/lapack_rt.hpp index 8f7aa92f6..a84331ceb 100644 --- a/include/oneapi/math/lapack/detail/lapack_rt.hpp +++ b/include/oneapi/math/lapack/detail/lapack_rt.hpp @@ -38,2132 +38,2137 @@ namespace oneapi { namespace math { namespace lapack { -static inline void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, +static inline void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -static inline void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, +static inline void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -static inline void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -static inline void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, +static inline void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -static inline void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +static inline void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +static inline void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -static inline void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, +static inline void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -static inline void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -static inline void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +static inline void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -static inline void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -static inline void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -static inline void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, +static inline void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -static inline void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, +static inline void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -static inline void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, +static inline void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -static inline void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, +static inline void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -static inline void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, +static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -static inline void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, +static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -static inline void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -static inline void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -static inline void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +static inline void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +static inline void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, +static inline void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, +static inline void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +static inline void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +static inline void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +static inline void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +static inline void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +static inline void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +static inline void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +static inline void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +static inline void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +static inline void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +static inline void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -static inline void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +static inline void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -static inline void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, +static inline void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -static inline void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, +static inline void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -static inline void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -static inline void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -static inline void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +static inline void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +static inline void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +static inline void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +static inline void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +static inline void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, +static inline void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, +static inline void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, +static inline void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, +static inline void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, +static inline void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, +static inline void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +static inline void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +static inline void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +static inline void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +static inline void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +static inline void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +static inline void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, +static inline void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, +static inline void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +static inline void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +static inline void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +static inline void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +static inline void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +static inline void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +static inline void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +static inline void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +static inline void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +static inline void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +static inline void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +static inline void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +static inline void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, +static inline sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, +static inline sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, +static inline sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, +static inline sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, +static inline sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, +static inline sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, +static inline sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, - double *b, std::int64_t ldb, double *scratchpad, +static inline sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, +static inline sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, +static inline sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, - double *vt, std::int64_t ldvt, double *scratchpad, + double* a, std::int64_t lda, double* s, double* u, std::int64_t ldu, + double* vt, std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu, + float* vt, std::int64_t ldvt, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, +static inline sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, +static inline sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, +static inline sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, +static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, +static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *scratchpad, +static inline sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *scratchpad, +static inline sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, +static inline sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, +static inline sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, +static inline sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, +static inline sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, +static inline sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, +static inline sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, +static inline sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, +static inline sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, +static inline sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, +static inline sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *w, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *w, float *scratchpad, +static inline sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, +static inline sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, +static inline sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, +static inline sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, +static inline sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, +static inline sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, +static inline sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, +static inline sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, +static inline sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, +static inline sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, +static inline sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, +static inline sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, +static inline sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +static inline sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +static inline sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } template = nullptr> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return detail::gebrd_scratchpad_size(get_device_id(queue), queue, m, n, lda); } template = nullptr> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return detail::gerqf_scratchpad_size(get_device_id(queue), queue, m, n, lda); } template = nullptr> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return detail::geqrf_scratchpad_size(get_device_id(queue), queue, m, n, lda); } template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { return detail::gesvd_scratchpad_size(get_device_id(queue), queue, jobu, jobvt, m, n, lda, ldu, ldvt); } template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { return detail::gesvd_scratchpad_size(get_device_id(queue), queue, jobu, jobvt, m, n, lda, ldu, ldvt); } template = nullptr> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return detail::getrf_scratchpad_size(get_device_id(queue), queue, m, n, lda); } template = nullptr> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { return detail::getri_scratchpad_size(get_device_id(queue), queue, n, lda); } template = nullptr> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb) { return detail::getrs_scratchpad_size(get_device_id(queue), queue, trans, n, nrhs, lda, ldb); } template = nullptr> -std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::heevd_scratchpad_size(get_device_id(queue), queue, jobz, uplo, n, lda); } template = nullptr> -std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, +std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { return detail::hegvd_scratchpad_size(get_device_id(queue), queue, itype, jobz, uplo, n, lda, ldb); } template = nullptr> -std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::hetrd_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::hetrf_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return detail::orgbr_scratchpad_size(get_device_id(queue), queue, vect, m, n, k, lda); } template = nullptr> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::orgtr_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return detail::orgqr_scratchpad_size(get_device_id(queue), queue, m, n, k, lda); } template = nullptr> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { return detail::ormrq_scratchpad_size(get_device_id(queue), queue, side, trans, m, n, k, lda, ldc); } template = nullptr> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { return detail::ormqr_scratchpad_size(get_device_id(queue), queue, side, trans, m, n, k, @@ -2171,7 +2176,7 @@ std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, } template = nullptr> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2179,45 +2184,45 @@ std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, n, lda, ldc); } template = nullptr> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::potrf_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return detail::potrs_scratchpad_size(get_device_id(queue), queue, uplo, n, nrhs, lda, ldb); } template = nullptr> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::potri_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::sytrf_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::syevd_scratchpad_size(get_device_id(queue), queue, jobz, uplo, n, lda); } template = nullptr> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { return detail::sygvd_scratchpad_size(get_device_id(queue), queue, itype, jobz, uplo, n, lda, ldb); } template = nullptr> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::sytrd_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2226,31 +2231,31 @@ std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, } template = nullptr> -std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, +std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return detail::ungbr_scratchpad_size(get_device_id(queue), queue, vect, m, n, k, lda); } template = nullptr> -std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return detail::ungqr_scratchpad_size(get_device_id(queue), queue, m, n, k, lda); } template = nullptr> -std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::ungtr_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { return detail::unmrq_scratchpad_size(get_device_id(queue), queue, side, trans, m, n, k, lda, ldc); } template = nullptr> -std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { return detail::unmqr_scratchpad_size(get_device_id(queue), queue, side, trans, m, n, k, @@ -2258,7 +2263,7 @@ std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, } template = nullptr> -std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2266,21 +2271,21 @@ std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, n, lda, ldc); } template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return detail::getrf_batch_scratchpad_size(get_device_id(queue), queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return detail::getri_batch_scratchpad_size(get_device_id(queue), queue, n, lda, stride_a, stride_ipiv, batch_size); } template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, @@ -2290,30 +2295,30 @@ std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::trans batch_size); } template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return detail::geqrf_batch_scratchpad_size(get_device_id(queue), queue, m, n, lda, stride_a, stride_tau, batch_size); } template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda, std::int64_t stride_a, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { return detail::potrf_batch_scratchpad_size(get_device_id(queue), queue, uplo, n, lda, stride_a, batch_size); } template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size) { +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t stride_a, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { return detail::potrs_batch_scratchpad_size(get_device_id(queue), queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return detail::orgqr_batch_scratchpad_size(get_device_id(queue), queue, m, n, k, lda, @@ -2321,68 +2326,68 @@ std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std } template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return detail::ungqr_batch_scratchpad_size(get_device_id(queue), queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return detail::getrf_batch_scratchpad_size(get_device_id(queue), queue, m, n, lda, group_count, group_sizes); } template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return detail::getri_batch_scratchpad_size(get_device_id(queue), queue, n, lda, group_count, group_sizes); } template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return detail::getrs_batch_scratchpad_size(get_device_id(queue), queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return detail::geqrf_batch_scratchpad_size(get_device_id(queue), queue, m, n, lda, group_count, group_sizes); } template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return detail::orgqr_batch_scratchpad_size(get_device_id(queue), queue, m, n, k, lda, group_count, group_sizes); } template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return detail::potrf_batch_scratchpad_size(get_device_id(queue), queue, uplo, n, lda, group_count, group_sizes); } template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return detail::potrs_batch_scratchpad_size(get_device_id(queue), queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return detail::ungqr_batch_scratchpad_size(get_device_id(queue), queue, m, n, k, lda, group_count, group_sizes); } diff --git a/include/oneapi/math/lapack/detail/mkl_common/lapack_ct.hxx b/include/oneapi/math/lapack/detail/mkl_common/lapack_ct.hxx index 41e7e8753..1dd86d2ed 100644 --- a/include/oneapi/math/lapack/detail/mkl_common/lapack_ct.hxx +++ b/include/oneapi/math/lapack/detail/mkl_common/lapack_ct.hxx @@ -18,2315 +18,2345 @@ *******************************************************************************/ static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, + taup, scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, + taup, scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, + taup, scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, + taup, scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, + scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, + scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, + scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, + scratchpad, scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size); + u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size); + u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size); + u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, - std::int64_t ldvt, sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size); + u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size); + b, ldb, w, scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size); + b, ldb, w, scratchpad, scratchpad_size); } -static inline void hetrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void hetrd(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void hetrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void hetrd(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void hetrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void hetrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void hetrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void hetrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void orgtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size) { +static inline void orgtr(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void orgtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size) { +static inline void orgtr(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void ormtr(backend_selector selector, oneapi::math::side side, - oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void ormtr(backend_selector selector, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size); + lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormtr(backend_selector selector, oneapi::math::side side, - oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void ormtr(backend_selector selector, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size); + lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormrq(backend_selector selector, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void ormrq(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size); + tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormrq(backend_selector selector, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void ormrq(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size); + tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormqr(backend_selector selector, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void ormqr(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size); + tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormqr(backend_selector selector, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void ormqr(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size); + tau, c, ldc, scratchpad, scratchpad_size); } -static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } -static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } -static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } -static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } -static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potri(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } -static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potri(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } -static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potri(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } -static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potri(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } -static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrs(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrs(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +static inline void potrs(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +static inline void potrs(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size); + b, ldb, w, scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size); + b, ldb, w, scratchpad, scratchpad_size); } -static inline void sytrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, +static inline void sytrd(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void sytrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void sytrd(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size) { +static inline void sytrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size) { +static inline void sytrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void sytrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void sytrf(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +static inline void trtrs(backend_selector selector, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size); + lda, b, ldb, scratchpad, scratchpad_size); } -static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void trtrs(backend_selector selector, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size); + lda, b, ldb, scratchpad, scratchpad_size); } -static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +static inline void trtrs(backend_selector selector, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size); + lda, b, ldb, scratchpad, scratchpad_size); } -static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +static inline void trtrs(backend_selector selector, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size); + lda, b, ldb, scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void ungtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungtr(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void ungtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungtr(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } -static inline void unmrq(backend_selector selector, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, +static inline void unmrq(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size); + tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmrq(backend_selector selector, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, +static inline void unmrq(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size); + tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmqr(backend_selector selector, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, +static inline void unmqr(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size); + tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmqr(backend_selector selector, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, +static inline void unmqr(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size); + tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmtr(backend_selector selector, oneapi::math::side side, - oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, +static inline void unmtr(backend_selector selector, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size); + lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmtr(backend_selector selector, oneapi::math::side side, - oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, +static inline void unmtr(backend_selector selector, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size); + lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrf_batch( selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrf_batch( selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrf_batch( selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrf_batch( selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size); } static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size); } static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size); } static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, - float *e, std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, - tauq, taup, scratchpad, scratchpad_size, - dependencies); + tauq, taup, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, - double *tauq, double *taup, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tauq, double* taup, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, - tauq, taup, scratchpad, scratchpad_size, - dependencies); + tauq, taup, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, - float *tauq, float *taup, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tauq, float* taup, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, - tauq, taup, scratchpad, scratchpad_size, - dependencies); + tauq, taup, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, - tauq, taup, scratchpad, scratchpad_size, - dependencies); + tauq, taup, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, scratchpad, scratchpad_size, - dependencies); + ipiv, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, scratchpad, scratchpad_size, - dependencies); + ipiv, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, scratchpad, scratchpad_size, - dependencies); + ipiv, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, scratchpad, scratchpad_size, - dependencies); + ipiv, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* s, double* u, std::int64_t ldu, double* vt, + std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, - lda, s, u, ldu, vt, ldvt, scratchpad, - scratchpad_size, dependencies); + lda, s, u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* s, float* u, std::int64_t ldu, float* vt, std::int64_t ldvt, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, - lda, s, u, ldu, vt, ldvt, scratchpad, - scratchpad_size, dependencies); + lda, s, u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, - lda, s, u, ldu, vt, ldvt, scratchpad, - scratchpad_size, dependencies); + lda, s, u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, - lda, s, u, ldu, vt, ldvt, scratchpad, - scratchpad_size, dependencies); + lda, s, u, ldu, vt, ldvt, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event heevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, - w, scratchpad, scratchpad_size, dependencies); + std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::heevd( + selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event heevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, - w, scratchpad, scratchpad_size, dependencies); + std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::heevd( + selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, - lda, b, ldb, w, scratchpad, scratchpad_size, - dependencies); + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, + a, lda, b, ldb, w, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, - lda, b, ldb, w, scratchpad, scratchpad_size, - dependencies); + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, + a, lda, b, ldb, w, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, - tau, scratchpad, scratchpad_size, - dependencies); + tau, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, - tau, scratchpad, scratchpad_size, - dependencies); + tau, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgbr( selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgbr( selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, - a, lda, tau, c, ldc, scratchpad, - scratchpad_size, dependencies); + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, + n, a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, - a, lda, tau, c, ldc, scratchpad, - scratchpad_size, dependencies); + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, + n, a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, + a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, + a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, + a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, + a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size, - dependencies); + b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size, - dependencies); + b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size, - dependencies); + b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size, - dependencies); + b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event syevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *w, double *scratchpad, + double* a, std::int64_t lda, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, - w, scratchpad, scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::syevd( + selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event syevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *w, float *scratchpad, + float* a, std::int64_t lda, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, - w, scratchpad, scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::syevd( + selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *w, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, - lda, b, ldb, w, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, + a, lda, b, ldb, w, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *w, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, - lda, b, ldb, w, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, + a, lda, b, ldb, w, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, - tau, scratchpad, scratchpad_size, - dependencies); + tau, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tau, float *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, - tau, scratchpad, scratchpad_size, - dependencies); + tau, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, - nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, - nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, - nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, - nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies); + nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungbr( selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungbr( selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, + a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, + a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, + a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, + a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, - a, lda, tau, c, ldc, scratchpad, - scratchpad_size, dependencies); + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, + n, a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, - a, lda, tau, c, ldc, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, + n, a, lda, tau, c, ldc, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch( selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch( selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::complex *a, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch( selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::complex *a, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch( selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch( + selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch( + selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch( + selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch( + selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri_batch( selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri_batch( selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri_batch( selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri_batch( selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, - std::int64_t **ipiv, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, + std::int64_t** ipiv, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, - std::int64_t **ipiv, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + std::int64_t** ipiv, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( - backend_selector selector, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + backend_selector selector, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch( selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch( selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch( selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + std::int64_t* m, std::int64_t* n, std::int64_t* k, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch( selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, + oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch( selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch( selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch( selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch( selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); @@ -2335,20 +2365,20 @@ static inline sycl::event ungqr_batch(backend_selector template = nullptr> std::int64_t gebrd_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::gebrd_scratchpad_size(selector.get_queue(), - m, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::gebrd_scratchpad_size( + selector.get_queue(), m, n, lda); } template = nullptr> std::int64_t gerqf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::gerqf_scratchpad_size(selector.get_queue(), - m, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::gerqf_scratchpad_size( + selector.get_queue(), m, n, lda); } template = nullptr> std::int64_t geqrf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::geqrf_scratchpad_size(selector.get_queue(), - m, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::geqrf_scratchpad_size( + selector.get_queue(), m, n, lda); } template = nullptr> std::int64_t gesvd_scratchpad_size(backend_selector selector, @@ -2369,14 +2399,14 @@ std::int64_t gesvd_scratchpad_size(backend_selector sel template = nullptr> std::int64_t getrf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::getrf_scratchpad_size(selector.get_queue(), - m, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::getrf_scratchpad_size( + selector.get_queue(), m, n, lda); } template = nullptr> std::int64_t getri_scratchpad_size(backend_selector selector, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::getri_scratchpad_size(selector.get_queue(), - n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::getri_scratchpad_size( + selector.get_queue(), n, lda); } template = nullptr> std::int64_t getrs_scratchpad_size(backend_selector selector, @@ -2389,8 +2419,8 @@ template = nullp std::int64_t heevd_scratchpad_size(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::heevd_scratchpad_size(selector.get_queue(), - jobz, uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::heevd_scratchpad_size( + selector.get_queue(), jobz, uplo, n, lda); } template = nullptr> std::int64_t hegvd_scratchpad_size(backend_selector selector, @@ -2403,37 +2433,37 @@ std::int64_t hegvd_scratchpad_size(backend_selector sel template = nullptr> std::int64_t hetrd_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::hetrd_scratchpad_size(selector.get_queue(), - uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::hetrd_scratchpad_size( + selector.get_queue(), uplo, n, lda); } template = nullptr> std::int64_t hetrf_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::hetrf_scratchpad_size(selector.get_queue(), - uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::hetrf_scratchpad_size( + selector.get_queue(), uplo, n, lda); } template = nullptr> std::int64_t orgbr_scratchpad_size(backend_selector selector, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::orgbr_scratchpad_size(selector.get_queue(), - vect, m, n, k, lda); + return oneapi::math::lapack::LAPACK_BACKEND::orgbr_scratchpad_size( + selector.get_queue(), vect, m, n, k, lda); } template = nullptr> std::int64_t orgtr_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::orgtr_scratchpad_size(selector.get_queue(), - uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::orgtr_scratchpad_size( + selector.get_queue(), uplo, n, lda); } template = nullptr> std::int64_t orgqr_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::orgqr_scratchpad_size(selector.get_queue(), - m, n, k, lda); + return oneapi::math::lapack::LAPACK_BACKEND::orgqr_scratchpad_size( + selector.get_queue(), m, n, k, lda); } template = nullptr> @@ -2464,8 +2494,8 @@ std::int64_t ormtr_scratchpad_size(backend_selector sel template = nullptr> std::int64_t potrf_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::potrf_scratchpad_size(selector.get_queue(), - uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::potrf_scratchpad_size( + selector.get_queue(), uplo, n, lda); } template = nullptr> std::int64_t potrs_scratchpad_size(backend_selector selector, @@ -2477,21 +2507,21 @@ std::int64_t potrs_scratchpad_size(backend_selector sel template = nullptr> std::int64_t potri_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::potri_scratchpad_size(selector.get_queue(), - uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::potri_scratchpad_size( + selector.get_queue(), uplo, n, lda); } template = nullptr> std::int64_t sytrf_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::sytrf_scratchpad_size(selector.get_queue(), - uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::sytrf_scratchpad_size( + selector.get_queue(), uplo, n, lda); } template = nullptr> std::int64_t syevd_scratchpad_size(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::syevd_scratchpad_size(selector.get_queue(), - jobz, uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::syevd_scratchpad_size( + selector.get_queue(), jobz, uplo, n, lda); } template = nullptr> std::int64_t sygvd_scratchpad_size(backend_selector selector, @@ -2504,8 +2534,8 @@ std::int64_t sygvd_scratchpad_size(backend_selector sel template = nullptr> std::int64_t sytrd_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::sytrd_scratchpad_size(selector.get_queue(), - uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::sytrd_scratchpad_size( + selector.get_queue(), uplo, n, lda); } template = nullptr> std::int64_t trtrs_scratchpad_size(backend_selector selector, @@ -2520,22 +2550,22 @@ template selector, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::ungbr_scratchpad_size(selector.get_queue(), - vect, m, n, k, lda); + return oneapi::math::lapack::LAPACK_BACKEND::ungbr_scratchpad_size( + selector.get_queue(), vect, m, n, k, lda); } template = nullptr> std::int64_t ungqr_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::ungqr_scratchpad_size(selector.get_queue(), - m, n, k, lda); + return oneapi::math::lapack::LAPACK_BACKEND::ungqr_scratchpad_size( + selector.get_queue(), m, n, k, lda); } template = nullptr> std::int64_t ungtr_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::LAPACK_BACKEND::ungtr_scratchpad_size(selector.get_queue(), - uplo, n, lda); + return oneapi::math::lapack::LAPACK_BACKEND::ungtr_scratchpad_size( + selector.get_queue(), uplo, n, lda); } template = nullptr> @@ -2631,64 +2661,64 @@ std::int64_t ungqr_batch_scratchpad_size(backend_selector = nullptr> std::int64_t getrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template = nullptr> std::int64_t getri_batch_scratchpad_size(backend_selector selector, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size( selector.get_queue(), n, lda, group_count, group_sizes); } template = nullptr> std::int64_t getrs_batch_scratchpad_size(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size( selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes); } template = nullptr> std::int64_t geqrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template = nullptr> std::int64_t orgqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::math::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } template = nullptr> std::int64_t potrf_batch_scratchpad_size(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::math::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size( selector.get_queue(), uplo, n, lda, group_count, group_sizes); } template = nullptr> std::int64_t potrs_batch_scratchpad_size(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size( selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template = nullptr> std::int64_t ungqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::math::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } diff --git a/include/oneapi/math/lapack/detail/mkl_common/onemath_lapack_backends.hxx b/include/oneapi/math/lapack/detail/mkl_common/onemath_lapack_backends.hxx index 5c1bdb095..15f59a7c7 100644 --- a/include/oneapi/math/lapack/detail/mkl_common/onemath_lapack_backends.hxx +++ b/include/oneapi/math/lapack/detail/mkl_common/onemath_lapack_backends.hxx @@ -17,2123 +17,2153 @@ * SPDX-License-Identifier: Apache-2.0 *******************************************************************************/ -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, - double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, - double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *w, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *w, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, + double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu, + float* vt, std::int64_t ldvt, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event potrs_batch( + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); template = nullptr> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda); +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); template = nullptr> -std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, +std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, +std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda, std::int64_t stride_a, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size); +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t stride_a, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); -template <> -ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> -ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size>(sycl::queue &queue, - std::int64_t m, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size>(sycl::queue& queue, + std::int64_t m, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, - std::int64_t m, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, + std::int64_t m, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size>(sycl::queue &queue, - std::int64_t m, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue &queue, - oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t ldu, std::int64_t ldvt); +ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size>(sycl::queue& queue, + std::int64_t m, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> +ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size( + sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, + std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); +template <> ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size>( - sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, + sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size>( - sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, + sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> -ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getrf_scratchpad_size>(sycl::queue &queue, - std::int64_t m, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getrf_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t getrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t getrf_scratchpad_size>(sycl::queue& queue, + std::int64_t m, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getri_scratchpad_size>(sycl::queue &queue, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getri_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue &queue, - oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t getri_scratchpad_size>(sycl::queue& queue, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> +ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, + oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); +template <> ONEMATH_EXPORT std::int64_t getrs_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t getrs_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> -ONEMATH_EXPORT std::int64_t heevd_scratchpad_size>(sycl::queue &queue, - oneapi::math::job jobz, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t heevd_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t heevd_scratchpad_size>(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t heevd_scratchpad_size>(sycl::queue& queue, + oneapi::math::job jobz, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size>( - sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size>( - sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> -ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size>(sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue &queue, - oneapi::math::generate vect, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue &queue, - oneapi::math::generate vect, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, + oneapi::math::generate vect, + std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, + oneapi::math::generate vect, + std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size( + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> -ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size( + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> -ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size( + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> -ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t lda, std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size( + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> -ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> -ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, + oneapi::math::side side, + oneapi::math::uplo uplo, + oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc); template <> -ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t potrf_scratchpad_size>(sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t potrf_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t potrf_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t potrf_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); +template <> +ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t potrs_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t ldb); + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t potrs_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t ldb); -template <> -ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); template <> -ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t potri_scratchpad_size>(sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t potri_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t potri_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t potri_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size>(sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, +ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, + oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> -ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda, std::int64_t ldb); template <> -ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size( + sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); +template <> +ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size( + sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size>( - sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size>( - sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> -ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size>(sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, - std::int64_t lda); -template <> -ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size>(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template <> +ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, + std::int64_t lda); +template <> ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size>( - sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size>( - sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size>( - sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size>( - sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size>( - sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size>( - sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> -ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t stride_a, - std::int64_t stride_ipiv, - std::int64_t batch_size); -template <> -ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> +ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size); +template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> -ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, - std::int64_t lda, - std::int64_t stride_a, - std::int64_t stride_ipiv, - std::int64_t batch_size); -template <> -ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> +ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, + std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size); +template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> -ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t stride_a, - std::int64_t stride_tau, - std::int64_t batch_size); -template <> -ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> +ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size); +template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> -ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, - oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda, - std::int64_t stride_a, - std::int64_t batch_size); -template <> -ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, +ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> +ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t batch_size); +template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> -ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); -template <> -ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); +template <> +ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> -ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); -template <> -ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); +template <> +ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* n, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* n, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> -ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); -template <> -ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); +template <> +ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); -template <> -ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); -template <> -ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); +template <> +ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> -ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, - oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> -ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, - oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); +template <> +ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); diff --git a/include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hxx b/include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hxx index 40468215c..f9db456a9 100644 --- a/include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hxx +++ b/include/oneapi/math/lapack/detail/rocsolver/lapack_ct.hxx @@ -22,2293 +22,2320 @@ // Buffer APIs static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); + scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, - std::int64_t ldvt, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, +static inline void potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, +static inline void potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); -} -static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); +} +static inline void potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } -static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, +static inline void potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } -static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, +static inline void potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } // USM APIs static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, - float *e, std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, - double *tauq, double *taup, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tauq, double* taup, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, - float *tauq, float *taup, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tauq, float* taup, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* s, double* u, std::int64_t ldu, double* vt, + std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, + s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* s, float* u, std::int64_t ldu, float* vt, std::int64_t ldvt, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, + s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, + s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, - oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); -} -static inline sycl::event heevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, + s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event heevd(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } -static inline sycl::event heevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event heevd(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, float *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, + lda, b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, double *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, + lda, b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event syevd(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); -} -static inline sycl::event syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *w, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } -static inline sycl::event syevd(backend_selector selector, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *w, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event syevd(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, + lda, b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, + lda, b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tau, float *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmqr(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmtr(backend_selector selector, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, + stride_a, ipiv, stride_ipiv, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, + stride_a, ipiv, stride_ipiv, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, + stride_a, ipiv, stride_ipiv, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, + stride_a, ipiv, stride_ipiv, batch_size, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getri_batch(backend_selector selector, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getri_batch(backend_selector selector, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( backend_selector selector, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, - std::int64_t **ipiv, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, + std::int64_t** ipiv, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, + lda, ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, - std::int64_t **ipiv, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + std::int64_t** ipiv, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, + lda, ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, + lda, ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( - backend_selector selector, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + backend_selector selector, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, + lda, ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event orgqr_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event orgqr_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::int64_t* k, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, float *a, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo uplo, std::int64_t n, double *a, + oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); -} -static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::math::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } // SCRATCHPAD APIs template std::int64_t gebrd_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::gebrd_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::math::lapack::rocsolver::gebrd_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t gerqf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::gerqf_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::math::lapack::rocsolver::gerqf_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t geqrf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::geqrf_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::math::lapack::rocsolver::geqrf_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t gesvd_scratchpad_size(backend_selector selector, @@ -2321,82 +2348,83 @@ std::int64_t gesvd_scratchpad_size(backend_selector selector template std::int64_t getrf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::getrf_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::math::lapack::rocsolver::getrf_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t getri_scratchpad_size(backend_selector selector, std::int64_t n, std::int64_t lda) { return oneapi::math::lapack::rocsolver::getri_scratchpad_size(selector.get_queue(), n, - lda); + lda); } template std::int64_t getrs_scratchpad_size(backend_selector selector, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return oneapi::math::lapack::rocsolver::getrs_scratchpad_size(selector.get_queue(), - trans, n, nrhs, lda, ldb); + return oneapi::math::lapack::rocsolver::getrs_scratchpad_size( + selector.get_queue(), trans, n, nrhs, lda, ldb); } template std::int64_t heevd_scratchpad_size(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::heevd_scratchpad_size(selector.get_queue(), jobz, - uplo, n, lda); + return oneapi::math::lapack::rocsolver::heevd_scratchpad_size(selector.get_queue(), + jobz, uplo, n, lda); } template -std::int64_t hegvd_scratchpad_size(backend_selector selector, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda, std::int64_t ldb) { +std::int64_t hegvd_scratchpad_size(backend_selector selector, + std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + std::int64_t ldb) { return oneapi::math::lapack::rocsolver::hegvd_scratchpad_size( selector.get_queue(), itype, jobz, uplo, n, lda, ldb); } template std::int64_t hetrd_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::hetrd_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::rocsolver::hetrd_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t hetrf_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::hetrf_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::rocsolver::hetrf_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t orgbr_scratchpad_size(backend_selector selector, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::orgbr_scratchpad_size(selector.get_queue(), vect, - m, n, k, lda); + return oneapi::math::lapack::rocsolver::orgbr_scratchpad_size(selector.get_queue(), + vect, m, n, k, lda); } template std::int64_t orgtr_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::orgtr_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::rocsolver::orgtr_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t orgqr_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::orgqr_scratchpad_size(selector.get_queue(), m, n, - k, lda); + return oneapi::math::lapack::rocsolver::orgqr_scratchpad_size(selector.get_queue(), m, + n, k, lda); } template std::int64_t ormrq_scratchpad_size(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::math::lapack::rocsolver::ormrq_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::math::lapack::rocsolver::ormrq_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t ormqr_scratchpad_size(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::math::lapack::rocsolver::ormqr_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::math::lapack::rocsolver::ormqr_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t ormtr_scratchpad_size(backend_selector selector, @@ -2409,47 +2437,48 @@ std::int64_t ormtr_scratchpad_size(backend_selector selector template std::int64_t potrf_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::potrf_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::rocsolver::potrf_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t potrs_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return oneapi::math::lapack::rocsolver::potrs_scratchpad_size(selector.get_queue(), uplo, - n, nrhs, lda, ldb); + return oneapi::math::lapack::rocsolver::potrs_scratchpad_size(selector.get_queue(), + uplo, n, nrhs, lda, ldb); } template std::int64_t potri_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::potri_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::rocsolver::potri_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t sytrf_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::sytrf_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::rocsolver::sytrf_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t syevd_scratchpad_size(backend_selector selector, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::syevd_scratchpad_size(selector.get_queue(), jobz, - uplo, n, lda); + return oneapi::math::lapack::rocsolver::syevd_scratchpad_size(selector.get_queue(), + jobz, uplo, n, lda); } template -std::int64_t sygvd_scratchpad_size(backend_selector selector, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda, std::int64_t ldb) { +std::int64_t sygvd_scratchpad_size(backend_selector selector, + std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + std::int64_t ldb) { return oneapi::math::lapack::rocsolver::sygvd_scratchpad_size( selector.get_queue(), itype, jobz, uplo, n, lda, ldb); } template std::int64_t sytrd_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::sytrd_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::rocsolver::sytrd_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t trtrs_scratchpad_size(backend_selector selector, @@ -2463,36 +2492,36 @@ template std::int64_t ungbr_scratchpad_size(backend_selector selector, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::ungbr_scratchpad_size(selector.get_queue(), vect, - m, n, k, lda); + return oneapi::math::lapack::rocsolver::ungbr_scratchpad_size(selector.get_queue(), + vect, m, n, k, lda); } template std::int64_t ungqr_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::ungqr_scratchpad_size(selector.get_queue(), m, n, - k, lda); + return oneapi::math::lapack::rocsolver::ungqr_scratchpad_size(selector.get_queue(), m, + n, k, lda); } template std::int64_t ungtr_scratchpad_size(backend_selector selector, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::math::lapack::rocsolver::ungtr_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::math::lapack::rocsolver::ungtr_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t unmrq_scratchpad_size(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::math::lapack::rocsolver::unmrq_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::math::lapack::rocsolver::unmrq_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t unmqr_scratchpad_size(backend_selector selector, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::math::lapack::rocsolver::unmqr_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::math::lapack::rocsolver::unmqr_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t unmtr_scratchpad_size(backend_selector selector, @@ -2568,62 +2597,62 @@ std::int64_t ungqr_batch_scratchpad_size(backend_selector se } template std::int64_t getrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::rocsolver::getrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template std::int64_t getri_batch_scratchpad_size(backend_selector selector, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::rocsolver::getri_batch_scratchpad_size( selector.get_queue(), n, lda, group_count, group_sizes); } template std::int64_t getrs_batch_scratchpad_size(backend_selector selector, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::rocsolver::getrs_batch_scratchpad_size( selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes); } template std::int64_t geqrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::rocsolver::geqrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template std::int64_t orgqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::math::lapack::rocsolver::orgqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } template std::int64_t potrf_batch_scratchpad_size(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::math::lapack::rocsolver::potrf_batch_scratchpad_size( selector.get_queue(), uplo, n, lda, group_count, group_sizes); } template std::int64_t potrs_batch_scratchpad_size(backend_selector selector, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::math::lapack::rocsolver::potrs_batch_scratchpad_size( selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template std::int64_t ungqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::math::lapack::rocsolver::ungqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } diff --git a/include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hxx b/include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hxx index 11a9fe326..f8ebfd700 100644 --- a/include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hxx +++ b/include/oneapi/math/lapack/detail/rocsolver/onemath_lapack_rocsolver.hxx @@ -21,1815 +21,1836 @@ // Buffer APIs -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - -ONEMATH_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, - std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +ONEMATH_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); // USM APIs -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, - double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, - double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *w, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *w, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, + double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu, + float* vt, std::int64_t ldvt, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event potrs_batch( + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); // SCRATCHPAD APIs template -ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::int64_t lda, std::int64_t ldu, - std::int64_t ldvt); +ONEMATH_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t ldu, std::int64_t ldvt); template -ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, - std::int64_t m, std::int64_t n, std::int64_t k, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, + std::int64_t m, std::int64_t n, std::int64_t k, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda); +ONEMATH_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb); +ONEMATH_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, - std::int64_t ldb); +ONEMATH_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); template -ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, - std::int64_t m, std::int64_t n, std::int64_t k, - std::int64_t lda); +ONEMATH_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, + std::int64_t m, std::int64_t n, std::int64_t k, + std::int64_t lda); template -ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda); +ONEMATH_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda); +ONEMATH_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda); template -ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t ldc); +ONEMATH_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t ldc); template -ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t stride_a, - std::int64_t stride_ipiv, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, - std::int64_t lda, std::int64_t stride_a, - std::int64_t stride_ipiv, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, - oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t stride_a, - std::int64_t stride_ipiv, std::int64_t ldb, - std::int64_t stride_b, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t lda, - std::int64_t stride_a, - std::int64_t stride_tau, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda, - std::int64_t stride_a, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t stride_a, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda, std::int64_t stride_a, - std::int64_t stride_tau, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda, std::int64_t stride_a, - std::int64_t stride_tau, - std::int64_t batch_size); +ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size); template -ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template ONEMATH_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMATH_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); diff --git a/include/oneapi/math/lapack/exceptions.hpp b/include/oneapi/math/lapack/exceptions.hpp index 33eddea4d..db9baf9d6 100644 --- a/include/oneapi/math/lapack/exceptions.hpp +++ b/include/oneapi/math/lapack/exceptions.hpp @@ -25,7 +25,7 @@ namespace lapack { class exception { public: - exception(oneapi::math::exception *_ex, std::int64_t info, std::int64_t detail = 0) + exception(oneapi::math::exception* _ex, std::int64_t info, std::int64_t detail = 0) : _info(info), _detail(detail), _ex(_ex) {} @@ -35,20 +35,20 @@ class exception { std::int64_t detail() const { return _detail; } - const char *what() const { + const char* what() const { return _ex->what(); } private: std::int64_t _info; std::int64_t _detail; - math::exception *_ex; + math::exception* _ex; }; class computation_error : public oneapi::math::computation_error, public oneapi::math::lapack::exception { public: - computation_error(const std::string &function, const std::string &info, std::int64_t code) + computation_error(const std::string& function, const std::string& info, std::int64_t code) : oneapi::math::computation_error("LAPACK", function, info), oneapi::math::lapack::exception(this, code) {} using oneapi::math::computation_error::what; @@ -56,17 +56,17 @@ class computation_error : public oneapi::math::computation_error, class batch_error : public oneapi::math::batch_error, public oneapi::math::lapack::exception { public: - batch_error(const std::string &function, const std::string &info, std::int64_t num_errors, + batch_error(const std::string& function, const std::string& info, std::int64_t num_errors, std::vector ids = {}, std::vector exceptions = {}) : oneapi::math::batch_error("LAPACK", function, info), oneapi::math::lapack::exception(this, num_errors), _ids(ids), _exceptions(exceptions) {} using oneapi::math::batch_error::what; - const std::vector &ids() const { + const std::vector& ids() const { return _ids; } - const std::vector &exceptions() const { + const std::vector& exceptions() const { return _exceptions; } @@ -78,7 +78,7 @@ class batch_error : public oneapi::math::batch_error, public oneapi::math::lapac class invalid_argument : public oneapi::math::invalid_argument, public oneapi::math::lapack::exception { public: - invalid_argument(const std::string &function, const std::string &info, + invalid_argument(const std::string& function, const std::string& info, std::int64_t arg_position = 0, std::int64_t detail = 0) : oneapi::math::invalid_argument("LAPACK", function, info), oneapi::math::lapack::exception(this, arg_position, detail) {} diff --git a/include/oneapi/math/rng/detail/curand/onemath_rng_curand.hpp b/include/oneapi/math/rng/detail/curand/onemath_rng_curand.hpp index 874432e19..7aa400bf8 100644 --- a/include/oneapi/math/rng/detail/curand/onemath_rng_curand.hpp +++ b/include/oneapi/math/rng/detail/curand/onemath_rng_curand.hpp @@ -75,13 +75,13 @@ namespace rng { namespace curand { ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10( sycl::queue queue, std::initializer_list seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, - std::uint32_t seed); + std::uint32_t seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a( sycl::queue queue, std::initializer_list seed); diff --git a/include/oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp b/include/oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp index ff875be2f..9848a5cee 100644 --- a/include/oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp +++ b/include/oneapi/math/rng/detail/mklcpu/onemath_rng_mklcpu.hpp @@ -36,13 +36,13 @@ namespace rng { namespace mklcpu { ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10( sycl::queue queue, std::initializer_list seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, - std::uint32_t seed); + std::uint32_t seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a( sycl::queue queue, std::initializer_list seed); diff --git a/include/oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp b/include/oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp index 9fc40dfcd..f36fe3997 100644 --- a/include/oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp +++ b/include/oneapi/math/rng/detail/mklgpu/onemath_rng_mklgpu.hpp @@ -36,13 +36,13 @@ namespace rng { namespace mklgpu { ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10( sycl::queue queue, std::initializer_list seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, - std::uint32_t seed); + std::uint32_t seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a( sycl::queue queue, std::initializer_list seed); diff --git a/include/oneapi/math/rng/detail/rng_loader.hpp b/include/oneapi/math/rng/detail/rng_loader.hpp index 19de23529..1855d641e 100644 --- a/include/oneapi/math/rng/detail/rng_loader.hpp +++ b/include/oneapi/math/rng/detail/rng_loader.hpp @@ -38,16 +38,16 @@ namespace rng { namespace detail { ONEMATH_EXPORT engine_impl* create_philox4x32x10(oneapi::math::device libkey, sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); ONEMATH_EXPORT engine_impl* create_philox4x32x10(oneapi::math::device libkey, sycl::queue queue, - std::initializer_list seed); + std::initializer_list seed); ONEMATH_EXPORT engine_impl* create_mrg32k3a(oneapi::math::device libkey, sycl::queue queue, - std::uint32_t seed); + std::uint32_t seed); ONEMATH_EXPORT engine_impl* create_mrg32k3a(oneapi::math::device libkey, sycl::queue queue, - std::initializer_list seed); + std::initializer_list seed); } // namespace detail } // namespace rng diff --git a/include/oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp b/include/oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp index 4e2538800..901f618f0 100644 --- a/include/oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp +++ b/include/oneapi/math/rng/detail/rocrand/onemath_rng_rocrand.hpp @@ -77,13 +77,13 @@ namespace rng { namespace rocrand { ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_philox4x32x10( sycl::queue queue, std::initializer_list seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, - std::uint32_t seed); + std::uint32_t seed); ONEMATH_EXPORT oneapi::math::rng::detail::engine_impl* create_mrg32k3a( sycl::queue queue, std::initializer_list seed); diff --git a/include/oneapi/math/rng/device/detail/exponential_impl.hpp b/include/oneapi/math/rng/device/detail/exponential_impl.hpp index f754fd548..5c89bf824 100644 --- a/include/oneapi/math/rng/device/detail/exponential_impl.hpp +++ b/include/oneapi/math/rng/device/detail/exponential_impl.hpp @@ -126,7 +126,8 @@ class distribution_base oneapi::math::rng::device::poisson>; friend class distribution_base< oneapi::math::rng::device::poisson>; - friend class distribution_base>; + friend class distribution_base< + oneapi::math::rng::device::gamma>; friend class distribution_base< oneapi::math::rng::device::gamma>; friend class distribution_base< diff --git a/include/oneapi/math/rng/device/detail/gamma_impl.hpp b/include/oneapi/math/rng/device/detail/gamma_impl.hpp index f338f2705..816ce0ecb 100644 --- a/include/oneapi/math/rng/device/detail/gamma_impl.hpp +++ b/include/oneapi/math/rng/device/detail/gamma_impl.hpp @@ -250,7 +250,8 @@ class distribution_base> { typename std::conditional>::type { if (algorithm_ == gamma_algorithm::Exponential) { - distribution_base> distr_exp(a_, beta_); + distribution_base> distr_exp(a_, + beta_); return distr_exp.generate(engine); } sycl::vec res{}; @@ -262,7 +263,8 @@ class distribution_base> { template RealType generate_single(EngineType& engine) { if (algorithm_ == gamma_algorithm::Exponential) { - distribution_base> distr_exp(a_, beta_); + distribution_base> distr_exp(a_, + beta_); RealType z = distr_exp.generate_single(engine); return z; } diff --git a/include/oneapi/math/rng/device/detail/mrg32k3a_impl.hpp b/include/oneapi/math/rng/device/detail/mrg32k3a_impl.hpp index 6bca38b19..8b4d91967 100644 --- a/include/oneapi/math/rng/device/detail/mrg32k3a_impl.hpp +++ b/include/oneapi/math/rng/device/detail/mrg32k3a_impl.hpp @@ -182,7 +182,7 @@ static inline void skip_ahead(engine_state(state.s, n, num_to_skip_ptr, skip_ahead_matrix[0]); @@ -190,7 +190,8 @@ static inline void skip_ahead(engine_state -static inline void validate_seed(engine_state>& state) { +static inline void validate_seed( + engine_state>& state) { int i; for (i = 0; i < 3; i++) { if (state.s[i] >= mrg32k3a_params::m1) { diff --git a/include/oneapi/math/rng/device/detail/philox4x32x10_impl.hpp b/include/oneapi/math/rng/device/detail/philox4x32x10_impl.hpp index 7ec22d816..ca133d22f 100644 --- a/include/oneapi/math/rng/device/detail/philox4x32x10_impl.hpp +++ b/include/oneapi/math/rng/device/detail/philox4x32x10_impl.hpp @@ -119,8 +119,9 @@ static inline void round_10(std::uint32_t* cnt, std::uint32_t* k) { } template -static inline void skip_ahead(engine_state>& state, - std::uint64_t num_to_skip) { +static inline void skip_ahead( + engine_state>& state, + std::uint64_t num_to_skip) { std::uint64_t num_to_skip_tmp = num_to_skip; std::uint64_t c_inc; std::uint32_t counter[4]; @@ -157,8 +158,9 @@ static inline void skip_ahead(engine_state -static inline void skip_ahead(engine_state>& state, - std::uint64_t n, const std::uint64_t* num_to_skip_ptr) { +static inline void skip_ahead( + engine_state>& state, std::uint64_t n, + const std::uint64_t* num_to_skip_ptr) { constexpr std::uint64_t uint_max = 0xFFFFFFFFFFFFFFFF; std::uint64_t post_buffer, pre_buffer; std::int32_t num_elements = 0; diff --git a/include/oneapi/math/rng/distributions.hpp b/include/oneapi/math/rng/distributions.hpp index 0af2b80e0..f20e1333f 100644 --- a/include/oneapi/math/rng/distributions.hpp +++ b/include/oneapi/math/rng/distributions.hpp @@ -76,7 +76,7 @@ class uniform { explicit uniform(Type a, Type b) : a_(a), b_(b) { if (a >= b) { throw oneapi::math::invalid_argument("rng", "uniform", - "parameters are incorrect, a >= b"); + "parameters are incorrect, a >= b"); } } @@ -104,7 +104,7 @@ class uniform { explicit uniform(std::int32_t a, std::int32_t b) : a_(a), b_(b) { if (a >= b) { throw oneapi::math::invalid_argument("rng", "uniform", - "parameters are incorrect, a >= b"); + "parameters are incorrect, a >= b"); } } @@ -161,7 +161,7 @@ class gaussian { explicit gaussian(RealType mean, RealType stddev) : mean_(mean), stddev_(stddev) { if (stddev <= static_cast(0.0)) { throw oneapi::math::invalid_argument("rng", "gaussian", - "stddev parameter is incorrect, stddev <= 0.0"); + "stddev parameter is incorrect, stddev <= 0.0"); } } diff --git a/include/oneapi/math/sparse_blas/detail/helper_types.hpp b/include/oneapi/math/sparse_blas/detail/helper_types.hpp index b3082b736..49c462d04 100644 --- a/include/oneapi/math/sparse_blas/detail/helper_types.hpp +++ b/include/oneapi/math/sparse_blas/detail/helper_types.hpp @@ -40,7 +40,7 @@ inline constexpr bool is_int_supported_v = template inline constexpr bool are_fp_int_supported_v = - is_fp_supported_v&& is_int_supported_v; + is_fp_supported_v && is_int_supported_v; } // namespace detail } // namespace sparse diff --git a/include/oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx b/include/oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx index 310a9a4e8..71e22a664 100644 --- a/include/oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx +++ b/include/oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx @@ -22,196 +22,197 @@ // Dense vector template -ONEMATH_EXPORT void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, - std::int64_t size, sycl::buffer val); +ONEMATH_EXPORT void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle, + std::int64_t size, sycl::buffer val); template -ONEMATH_EXPORT void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, - std::int64_t size, dataType *val); +ONEMATH_EXPORT void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle, + std::int64_t size, dataType* val); template -ONEMATH_EXPORT void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, - std::int64_t size, sycl::buffer val); +ONEMATH_EXPORT void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle, + std::int64_t size, sycl::buffer val); template -ONEMATH_EXPORT void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, - std::int64_t size, dataType *val); +ONEMATH_EXPORT void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle, + std::int64_t size, dataType* val); -ONEMATH_EXPORT sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event release_dense_vector(sycl::queue& queue, dense_vector_handle_t dvhandle, + const std::vector& dependencies = {}); // Dense matrix template -ONEMATH_EXPORT void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, - layout dense_layout, sycl::buffer val); +ONEMATH_EXPORT void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + layout dense_layout, sycl::buffer val); template -ONEMATH_EXPORT void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, - layout dense_layout, dataType *val); +ONEMATH_EXPORT void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + layout dense_layout, dataType* val); template -ONEMATH_EXPORT void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t ld, layout dense_layout, - sycl::buffer val); +ONEMATH_EXPORT void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t ld, layout dense_layout, + sycl::buffer val); template -ONEMATH_EXPORT void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t ld, layout dense_layout, dataType *val); +ONEMATH_EXPORT void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t ld, layout dense_layout, dataType* val); -ONEMATH_EXPORT sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhandle, + const std::vector& dependencies = {}); // COO matrix template -ONEMATH_EXPORT void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - index_base index, sycl::buffer row_ind, - sycl::buffer col_ind, - sycl::buffer val); +ONEMATH_EXPORT void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, + sycl::buffer val); template -ONEMATH_EXPORT void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - index_base index, indexType *row_ind, indexType *col_ind, - dataType *val); +ONEMATH_EXPORT void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, indexType* row_ind, indexType* col_ind, + dataType* val); template -ONEMATH_EXPORT void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, - sycl::buffer row_ind, - sycl::buffer col_ind, - sycl::buffer val); +ONEMATH_EXPORT void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, + sycl::buffer row_ind, + sycl::buffer col_ind, + sycl::buffer val); template -ONEMATH_EXPORT void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ind, - indexType *col_ind, dataType *val); +ONEMATH_EXPORT void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ind, + indexType* col_ind, dataType* val); // CSR matrix template -ONEMATH_EXPORT void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - index_base index, sycl::buffer row_ptr, - sycl::buffer col_ind, - sycl::buffer val); +ONEMATH_EXPORT void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, + sycl::buffer val); template -ONEMATH_EXPORT void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - index_base index, indexType *row_ptr, indexType *col_ind, - dataType *val); +ONEMATH_EXPORT void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, indexType* row_ptr, indexType* col_ind, + dataType* val); template -ONEMATH_EXPORT void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, - sycl::buffer row_ptr, - sycl::buffer col_ind, - sycl::buffer val); +ONEMATH_EXPORT void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, + sycl::buffer row_ptr, + sycl::buffer col_ind, + sycl::buffer val); template -ONEMATH_EXPORT void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, - std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ptr, - indexType *col_ind, dataType *val); +ONEMATH_EXPORT void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ptr, + indexType* col_ind, dataType* val); // Common sparse matrix functions -ONEMATH_EXPORT sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, - const std::vector &dependencies = {}); +ONEMATH_EXPORT sycl::event release_sparse_matrix(sycl::queue& queue, matrix_handle_t smhandle, + const std::vector& dependencies = {}); -bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property); +bool set_matrix_property(sycl::queue& queue, matrix_handle_t smhandle, matrix_property property); // SPMM -ONEMATH_EXPORT void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr); - -ONEMATH_EXPORT sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT void spmm_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, - matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, - dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, std::size_t &temp_buffer_size); - -ONEMATH_EXPORT void spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, matrix_view A_view, - matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, sycl::buffer workspace); - -ONEMATH_EXPORT sycl::event spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, - matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, - dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spmm(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, matrix_view A_view, - matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void init_spmm_descr(sycl::queue& queue, spmm_descr_t* p_spmm_descr); + +ONEMATH_EXPORT sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT void spmm_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, + dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, std::size_t& temp_buffer_size); + +ONEMATH_EXPORT void spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, + dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, sycl::buffer workspace); + +ONEMATH_EXPORT sycl::event spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, + dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spmm(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, + const std::vector& dependencies = {}); // SPMV -ONEMATH_EXPORT void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr); - -ONEMATH_EXPORT sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT void spmv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, - dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, std::size_t &temp_buffer_size); - -ONEMATH_EXPORT void spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, - matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, - dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, sycl::buffer workspace); - -ONEMATH_EXPORT sycl::event spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, - const void *alpha, matrix_view A_view, - matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, - spmv_alg alg, spmv_descr_t spmv_descr, void *workspace, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spmv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, - matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, - dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void init_spmv_descr(sycl::queue& queue, spmv_descr_t* p_spmv_descr); + +ONEMATH_EXPORT sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT void spmv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, + const void* alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t& temp_buffer_size); + +ONEMATH_EXPORT void spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void* beta, + dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, sycl::buffer workspace); + +ONEMATH_EXPORT sycl::event spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, + const void* alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void* beta, dense_vector_handle_t y_handle, + spmv_alg alg, spmv_descr_t spmv_descr, void* workspace, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void* beta, + dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, + const std::vector& dependencies = {}); // SPSV -ONEMATH_EXPORT void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr); - -ONEMATH_EXPORT sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT void spsv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, - spsv_alg alg, spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size); - -ONEMATH_EXPORT void spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, - matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, - spsv_alg alg, spsv_descr_t spsv_descr, - sycl::buffer workspace); - -ONEMATH_EXPORT sycl::event spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, - const void *alpha, matrix_view A_view, - matrix_handle_t A_handle, dense_vector_handle_t x_handle, - dense_vector_handle_t y_handle, spsv_alg alg, - spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies = {}); - -ONEMATH_EXPORT sycl::event spsv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, - matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, - spsv_alg alg, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}); +ONEMATH_EXPORT void init_spsv_descr(sycl::queue& queue, spsv_descr_t* p_spsv_descr); + +ONEMATH_EXPORT sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT void spsv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, + const void* alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, + spsv_descr_t spsv_descr, std::size_t& temp_buffer_size); + +ONEMATH_EXPORT void spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + sycl::buffer workspace); + +ONEMATH_EXPORT sycl::event spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, + const void* alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, + spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies = {}); + +ONEMATH_EXPORT sycl::event spsv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + const std::vector& dependencies = {}); diff --git a/include/oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx b/include/oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx index 5f28b1ecb..6d7b05d3d 100644 --- a/include/oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx +++ b/include/oneapi/math/sparse_blas/detail/sparse_blas_ct.hxx @@ -27,14 +27,14 @@ // Dense vector template std::enable_if_t> init_dense_vector( - backend_selector selector, dense_vector_handle_t *p_dvhandle, + backend_selector selector, dense_vector_handle_t* p_dvhandle, std::int64_t size, sycl::buffer val) { BACKEND::init_dense_vector(selector.get_queue(), p_dvhandle, size, val); } template std::enable_if_t> init_dense_vector( - backend_selector selector, dense_vector_handle_t *p_dvhandle, - std::int64_t size, dataType *val) { + backend_selector selector, dense_vector_handle_t* p_dvhandle, + std::int64_t size, dataType* val) { BACKEND::init_dense_vector(selector.get_queue(), p_dvhandle, size, val); } @@ -47,20 +47,20 @@ std::enable_if_t> set_dense_vector_data( template std::enable_if_t> set_dense_vector_data( backend_selector selector, dense_vector_handle_t dvhandle, std::int64_t size, - dataType *val) { + dataType* val) { BACKEND::set_dense_vector_data(selector.get_queue(), dvhandle, size, val); } inline sycl::event release_dense_vector(backend_selector selector, dense_vector_handle_t dvhandle, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_dense_vector(selector.get_queue(), dvhandle, dependencies); } // Dense matrix template std::enable_if_t> init_dense_matrix( - backend_selector selector, dense_matrix_handle_t *p_dmhandle, + backend_selector selector, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val) { BACKEND::init_dense_matrix(selector.get_queue(), p_dmhandle, num_rows, num_cols, ld, @@ -68,9 +68,9 @@ std::enable_if_t> init_dense_matrix( } template std::enable_if_t> init_dense_matrix( - backend_selector selector, dense_matrix_handle_t *p_dmhandle, + backend_selector selector, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, - dataType *val) { + dataType* val) { BACKEND::init_dense_matrix(selector.get_queue(), p_dmhandle, num_rows, num_cols, ld, dense_layout, val); } @@ -87,21 +87,21 @@ template std::enable_if_t> set_dense_matrix_data( backend_selector selector, dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, - dataType *val) { + dataType* val) { BACKEND::set_dense_matrix_data(selector.get_queue(), dmhandle, num_rows, num_cols, ld, dense_layout, val); } inline sycl::event release_dense_matrix(backend_selector selector, dense_matrix_handle_t dmhandle, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_dense_matrix(selector.get_queue(), dmhandle, dependencies); } // COO matrix template std::enable_if_t> init_coo_matrix( - backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + backend_selector selector, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val) { BACKEND::init_coo_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, @@ -109,9 +109,9 @@ std::enable_if_t> init_coo_m } template std::enable_if_t> init_coo_matrix( - backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ind, - indexType *col_ind, dataType *val) { + backend_selector selector, matrix_handle_t* p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType* row_ind, + indexType* col_ind, dataType* val) { BACKEND::init_coo_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); } @@ -127,8 +127,8 @@ std::enable_if_t> set_coo_ma template std::enable_if_t> set_coo_matrix_data( backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ind, - indexType *col_ind, dataType *val) { + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType* row_ind, + indexType* col_ind, dataType* val) { BACKEND::set_coo_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); } @@ -136,7 +136,7 @@ std::enable_if_t> set_coo_ma // CSR matrix template std::enable_if_t> init_csr_matrix( - backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + backend_selector selector, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val) { BACKEND::init_csr_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, @@ -144,9 +144,9 @@ std::enable_if_t> init_csr_m } template std::enable_if_t> init_csr_matrix( - backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ptr, - indexType *col_ind, dataType *val) { + backend_selector selector, matrix_handle_t* p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType* row_ptr, + indexType* col_ind, dataType* val) { BACKEND::init_csr_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); } @@ -162,8 +162,8 @@ std::enable_if_t> set_csr_ma template std::enable_if_t> set_csr_matrix_data( backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ptr, - indexType *col_ind, dataType *val) { + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType* row_ptr, + indexType* col_ind, dataType* val) { BACKEND::set_csr_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); } @@ -171,7 +171,7 @@ std::enable_if_t> set_csr_ma // Common sparse matrix functions inline sycl::event release_sparse_matrix(backend_selector selector, matrix_handle_t smhandle, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_sparse_matrix(selector.get_queue(), smhandle, dependencies); } @@ -182,30 +182,30 @@ inline bool set_matrix_property(backend_selector selector, // SPMM inline void init_spmm_descr(backend_selector selector, - spmm_descr_t *p_spmm_descr) { + spmm_descr_t* p_spmm_descr) { BACKEND::init_spmm_descr(selector.get_queue(), p_spmm_descr); } inline sycl::event release_spmm_descr(backend_selector selector, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_spmm_descr(selector.get_queue(), spmm_descr, dependencies); } inline void spmm_buffer_size(backend_selector selector, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - std::size_t &temp_buffer_size) { + std::size_t& temp_buffer_size) { BACKEND::spmm_buffer_size(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, temp_buffer_size); } inline void spmm_optimize(backend_selector selector, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, matrix_view A_view, + oneapi::math::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace) { BACKEND::spmm_optimize(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, workspace); @@ -213,48 +213,48 @@ inline void spmm_optimize(backend_selector selector, oneapi::m inline sycl::event spmm_optimize(backend_selector selector, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies = {}) { + spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies = {}) { return BACKEND::spmm_optimize(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, workspace, dependencies); } inline sycl::event spmm(backend_selector selector, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, matrix_view A_view, - matrix_handle_t A_handle, dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::transpose opB, const void* alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::spmm(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies); } // SPMV inline void init_spmv_descr(backend_selector selector, - spmv_descr_t *p_spmv_descr) { + spmv_descr_t* p_spmv_descr) { BACKEND::init_spmv_descr(selector.get_queue(), p_spmv_descr); } inline sycl::event release_spmv_descr(backend_selector selector, spmv_descr_t spmv_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_spmv_descr(selector.get_queue(), spmv_descr, dependencies); } inline void spmv_buffer_size(backend_selector selector, - oneapi::math::transpose opA, const void *alpha, matrix_view A_view, + oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t& temp_buffer_size) { BACKEND::spmv_buffer_size(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, temp_buffer_size); } inline void spmv_optimize(backend_selector selector, oneapi::math::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, sycl::buffer workspace) { BACKEND::spmv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, @@ -262,47 +262,47 @@ inline void spmv_optimize(backend_selector selector, oneapi::m } inline sycl::event spmv_optimize(backend_selector selector, - oneapi::math::transpose opA, const void *alpha, matrix_view A_view, + oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, void *workspace, - const std::vector &dependencies = {}) { + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, void* workspace, + const std::vector& dependencies = {}) { return BACKEND::spmv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, workspace, dependencies); } inline sycl::event spmv(backend_selector selector, oneapi::math::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::spmv(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, dependencies); } // SPSV inline void init_spsv_descr(backend_selector selector, - spsv_descr_t *p_spsv_descr) { + spsv_descr_t* p_spsv_descr) { BACKEND::init_spsv_descr(selector.get_queue(), p_spsv_descr); } inline sycl::event release_spsv_descr(backend_selector selector, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_spsv_descr(selector.get_queue(), spsv_descr, dependencies); } inline void spsv_buffer_size(backend_selector selector, - oneapi::math::transpose opA, const void *alpha, matrix_view A_view, + oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size) { + std::size_t& temp_buffer_size) { BACKEND::spsv_buffer_size(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, temp_buffer_size); } inline void spsv_optimize(backend_selector selector, oneapi::math::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, sycl::buffer workspace) { @@ -311,20 +311,20 @@ inline void spsv_optimize(backend_selector selector, oneapi::m } inline sycl::event spsv_optimize(backend_selector selector, - oneapi::math::transpose opA, const void *alpha, matrix_view A_view, + oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, - spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies = {}) { + spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies = {}) { return BACKEND::spsv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, workspace, dependencies); } inline sycl::event spsv(backend_selector selector, oneapi::math::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::spsv(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, dependencies); } diff --git a/include/oneapi/math/sparse_blas/detail/sparse_blas_rt.hpp b/include/oneapi/math/sparse_blas/detail/sparse_blas_rt.hpp index df3341814..8066691e5 100644 --- a/include/oneapi/math/sparse_blas/detail/sparse_blas_rt.hpp +++ b/include/oneapi/math/sparse_blas/detail/sparse_blas_rt.hpp @@ -30,176 +30,176 @@ namespace sparse { // Dense vector template std::enable_if_t> init_dense_vector( - sycl::queue &queue, dense_vector_handle_t *p_dvhandle, std::int64_t size, + sycl::queue& queue, dense_vector_handle_t* p_dvhandle, std::int64_t size, sycl::buffer val); template std::enable_if_t> init_dense_vector( - sycl::queue &queue, dense_vector_handle_t *p_dvhandle, std::int64_t size, dataType *val); + sycl::queue& queue, dense_vector_handle_t* p_dvhandle, std::int64_t size, dataType* val); template std::enable_if_t> set_dense_vector_data( - sycl::queue &queue, dense_vector_handle_t dvhandle, std::int64_t size, + sycl::queue& queue, dense_vector_handle_t dvhandle, std::int64_t size, sycl::buffer val); template std::enable_if_t> set_dense_vector_data( - sycl::queue &queue, dense_vector_handle_t dvhandle, std::int64_t size, dataType *val); + sycl::queue& queue, dense_vector_handle_t dvhandle, std::int64_t size, dataType* val); -sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, - const std::vector &dependencies = {}); +sycl::event release_dense_vector(sycl::queue& queue, dense_vector_handle_t dvhandle, + const std::vector& dependencies = {}); // Dense matrix template std::enable_if_t> init_dense_matrix( - sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, std::int64_t num_rows, + sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); template std::enable_if_t> init_dense_matrix( - sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType *val); + sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType* val); template std::enable_if_t> set_dense_matrix_data( - sycl::queue &queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, + sycl::queue& queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); template std::enable_if_t> set_dense_matrix_data( - sycl::queue &queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType *val); + sycl::queue& queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType* val); -sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, - const std::vector &dependencies = {}); +sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhandle, + const std::vector& dependencies = {}); // COO matrix template std::enable_if_t> init_coo_matrix( - sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val); template std::enable_if_t> init_coo_matrix( - sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ind, indexType *col_ind, dataType *val); + sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ind, indexType* col_ind, dataType* val); template std::enable_if_t> set_coo_matrix_data( - sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val); template std::enable_if_t> set_coo_matrix_data( - sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ind, indexType *col_ind, dataType *val); + sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ind, indexType* col_ind, dataType* val); // CSR matrix template std::enable_if_t> init_csr_matrix( - sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val); template std::enable_if_t> init_csr_matrix( - sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ptr, indexType *col_ind, dataType *val); + sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ptr, indexType* col_ind, dataType* val); template std::enable_if_t> set_csr_matrix_data( - sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val); template std::enable_if_t> set_csr_matrix_data( - sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ptr, indexType *col_ind, dataType *val); + sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ptr, indexType* col_ind, dataType* val); // Common sparse matrix functions -sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, - const std::vector &dependencies = {}); +sycl::event release_sparse_matrix(sycl::queue& queue, matrix_handle_t smhandle, + const std::vector& dependencies = {}); -bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property); +bool set_matrix_property(sycl::queue& queue, matrix_handle_t smhandle, matrix_property property); // SPMM -void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr); +void init_spmm_descr(sycl::queue& queue, spmm_descr_t* p_spmm_descr); -sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}); +sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr, + const std::vector& dependencies = {}); -void spmm_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, +void spmm_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); -void spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, +void spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace); -sycl::event spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, matrix_view A_view, +sycl::event spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies = {}); + const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies = {}); -sycl::event spmm(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, +sycl::event spmm(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); // SPMV -void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr); +void init_spmv_descr(sycl::queue& queue, spmv_descr_t* p_spmv_descr); -sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, - const std::vector &dependencies = {}); +sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr, + const std::vector& dependencies = {}); -void spmv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spmv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, std::size_t &temp_buffer_size); + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t& temp_buffer_size); -void spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, sycl::buffer workspace); -sycl::event spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, - void *workspace, const std::vector &dependencies = {}); + void* workspace, const std::vector& dependencies = {}); -sycl::event spmv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spmv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, const std::vector &dependencies = {}); + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, const std::vector& dependencies = {}); // SPSV -void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr); +void init_spsv_descr(sycl::queue& queue, spsv_descr_t* p_spsv_descr); -sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}); +sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr, + const std::vector& dependencies = {}); -void spsv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spsv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); -void spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, sycl::buffer workspace); -sycl::event spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, - spsv_alg alg, spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies = {}); + spsv_alg alg, spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies = {}); -sycl::event spsv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spsv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); } // namespace sparse } // namespace math diff --git a/include/oneapi/mkl/blas.hpp b/include/oneapi/mkl/blas.hpp index a8befa71e..71b2fed1e 100644 --- a/include/oneapi/mkl/blas.hpp +++ b/include/oneapi/mkl/blas.hpp @@ -21,7 +21,8 @@ #define ONEMATH_MKL_BLAS_HPP // Deprecated header is planned to be removed late 2025. -#pragma message("Header `oneapi/mkl/blas.hpp` is deprecated, please use `oneapi/math/blas.hpp` instead") +#pragma message( \ + "Header `oneapi/mkl/blas.hpp` is deprecated, please use `oneapi/math/blas.hpp` instead") #include "oneapi/math/blas.hpp" diff --git a/include/oneapi/mkl/dft.hpp b/include/oneapi/mkl/dft.hpp index 09ec86cc7..d86952974 100644 --- a/include/oneapi/mkl/dft.hpp +++ b/include/oneapi/mkl/dft.hpp @@ -21,7 +21,8 @@ #define ONEMATH_MKL_DFT_HPP // Deprecated header is planned to be removed late 2025. -#pragma message("Header `oneapi/mkl/dft.hpp` is deprecated, please use `oneapi/math/dft.hpp` instead") +#pragma message( \ + "Header `oneapi/mkl/dft.hpp` is deprecated, please use `oneapi/math/dft.hpp` instead") #include "oneapi/math/dft.hpp" diff --git a/include/oneapi/mkl/lapack.hpp b/include/oneapi/mkl/lapack.hpp index 990511249..0a358f564 100644 --- a/include/oneapi/mkl/lapack.hpp +++ b/include/oneapi/mkl/lapack.hpp @@ -21,7 +21,8 @@ #define ONEMATH_MKL_LAPACK_HPP // Deprecated header is planned to be removed late 2025. -#pragma message("Header `oneapi/mkl/lapack.hpp` is deprecated, please use `oneapi/math/lapack.hpp` instead") +#pragma message( \ + "Header `oneapi/mkl/lapack.hpp` is deprecated, please use `oneapi/math/lapack.hpp` instead") #include "oneapi/math/lapack.hpp" diff --git a/include/oneapi/mkl/namespace_alias.hpp b/include/oneapi/mkl/namespace_alias.hpp index fa52bad2c..3b09b5867 100644 --- a/include/oneapi/mkl/namespace_alias.hpp +++ b/include/oneapi/mkl/namespace_alias.hpp @@ -18,8 +18,9 @@ **************************************************************************/ namespace oneapi { - // Deprecated namespace is planned to be removed late 2025. - namespace [[deprecated("Namespace `oneapi::mkl` is deprecated, please use `oneapi::math` instead")]] mkl { - using namespace math; - } +// Deprecated namespace is planned to be removed late 2025. +namespace + [[deprecated("Namespace `oneapi::mkl` is deprecated, please use `oneapi::math` instead")]] mkl { +using namespace math; } +} // namespace oneapi diff --git a/include/oneapi/mkl/rng.hpp b/include/oneapi/mkl/rng.hpp index 094a446a1..be4760dad 100644 --- a/include/oneapi/mkl/rng.hpp +++ b/include/oneapi/mkl/rng.hpp @@ -21,7 +21,8 @@ #define ONEMATH_MKL_RNG_HPP // Deprecated header is planned to be removed late 2025. -#pragma message("Header `oneapi/mkl/rng.hpp` is deprecated, please use `oneapi/math/rng.hpp` instead") +#pragma message( \ + "Header `oneapi/mkl/rng.hpp` is deprecated, please use `oneapi/math/rng.hpp` instead") #include "oneapi/math/rng.hpp" diff --git a/include/oneapi/mkl/rng/device.hpp b/include/oneapi/mkl/rng/device.hpp index 012649884..9024e066b 100644 --- a/include/oneapi/mkl/rng/device.hpp +++ b/include/oneapi/mkl/rng/device.hpp @@ -21,7 +21,8 @@ #define ONEMATH_MKL_RNG_DEVICE_HPP // Deprecated header is planned to be removed late 2025. -#pragma message("Header `oneapi/mkl/rng/device.hpp` is deprecated, please use `oneapi/math/rng/math.hpp` instead") +#pragma message( \ + "Header `oneapi/mkl/rng/device.hpp` is deprecated, please use `oneapi/math/rng/math.hpp` instead") #include "oneapi/math/rng/device.hpp" diff --git a/include/oneapi/mkl/sparse_blas.hpp b/include/oneapi/mkl/sparse_blas.hpp index 805f55a0a..1ec890462 100644 --- a/include/oneapi/mkl/sparse_blas.hpp +++ b/include/oneapi/mkl/sparse_blas.hpp @@ -21,7 +21,8 @@ #define ONEMATH_MKL_SPARSE_BLAS_HPP // Deprecated header is planned to be removed late 2025. -#pragma message("Header `oneapi/mkl/sparse_blas.hpp` is deprecated, please use `oneapi/math/sparse_blas.hpp` instead") +#pragma message( \ + "Header `oneapi/mkl/sparse_blas.hpp` is deprecated, please use `oneapi/math/sparse_blas.hpp` instead") #include "oneapi/math/sparse_blas.hpp" diff --git a/src/blas/backends/cublas/cublas_batch.cpp b/src/blas/backends/cublas/cublas_batch.cpp index dc53e0679..822817d6f 100644 --- a/src/blas/backends/cublas/cublas_batch.cpp +++ b/src/blas/backends/cublas/cublas_batch.cpp @@ -29,122 +29,122 @@ namespace column_major { // Buffer APIs -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, float beta, sycl::buffer &y, int64_t incy, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, float beta, sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, double beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, double beta, + sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, - int64_t stride_x, std::complex beta, sycl::buffer, 1> &y, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, + sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, sycl::buffer &c, int64_t ldc, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } template -inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, Ts alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - Ts beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +inline void gemm_batch_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, Ts alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + Ts beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { using cuTypeA = typename CudaEquivalentType::Type; using cuTypeB = typename CudaEquivalentType::Type; @@ -153,7 +153,7 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran overflow_check(m, n, k, lda, ldb, ldc, stride_a, stride_b, stride_c, batch_size); cublasGemmAlgo_t cublas_gemm_algo = CUBLAS_GEMM_DEFAULT; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { if (!verify_support(queue, sycl::aspect::fp16)) { throw oneapi::math::unimplemented( "blas", "sycl::half", "half is not supported by the device or the sycl compiler"); @@ -161,19 +161,19 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - CUBLAS_ERROR_FUNC_T( - "cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, handle, - get_cublas_operation(transa), get_cublas_operation(transb), m, n, k, &alpha, a_, - get_cublas_datatype(), lda, stride_a, b_, get_cublas_datatype(), - ldb, stride_b, &beta, c_, get_cublas_datatype(), ldc, stride_c, batch_size, - get_cublas_datatype(), cublas_gemm_algo); + CUBLAS_ERROR_FUNC_T("cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, + handle, get_cublas_operation(transa), get_cublas_operation(transb), + m, n, k, &alpha, a_, get_cublas_datatype(), lda, stride_a, + b_, get_cublas_datatype(), ldb, stride_b, &beta, c_, + get_cublas_datatype(), ldc, stride_c, batch_size, + get_cublas_datatype(), cublas_gemm_algo); #else CUBLAS_ERROR_FUNC_T_SYNC( "cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, handle, @@ -187,10 +187,10 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran } #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, \ int64_t batch_size) { \ gemm_batch_impl(queue, transa, transb, m, n, k, alpha, a, \ lda, stride_a, b, ldb, stride_b, beta, c, \ @@ -209,10 +209,10 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::complex, std::complex, std::com #undef GEMM_STRIDED_BATCH_LAUNCHER #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, \ int64_t batch_size) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ @@ -225,307 +225,308 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } // USM APIs -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, float **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, double **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t incx, - std::int64_t stridex, float *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, + std::int64_t stridex, float* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - std::int64_t stridex, double *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + std::int64_t stridex, double* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float beta, float *y, int64_t incy, int64_t stride_y, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float beta, float* y, int64_t incy, int64_t stride_y, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, const double *x, - int64_t incx, int64_t stride_x, double beta, double *y, int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, const double* x, + int64_t incx, int64_t stride_x, double beta, double* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } template -inline sycl::event gemv_batch(const char *func_name, Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, const T **x, - int64_t *incx, T *beta, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event gemv_batch(const char* func_name, Func func, sycl::queue& queue, + transpose* trans, int64_t* m, int64_t* n, T* alpha, const T** a, + int64_t* lda, const T** x, int64_t* incx, T* beta, T** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], incx[i], incy[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; cublasStatus_t err; - auto **a_ = reinterpret_cast(a); - auto **x_ = reinterpret_cast(x); - auto **y_ = reinterpret_cast(y); + auto** a_ = reinterpret_cast(a); + auto** x_ = reinterpret_cast(x); + auto** y_ = reinterpret_cast(y); for (int64_t i = 0; i < group_count; i++) { - cublas_native_named_func( - func_name, func, err, handle, get_cublas_operation(trans[i]), - (int)m[i], (int)n[i], - (cuDataType *)&alpha[i], a_ + offset, (int)lda[i], x_ + offset, (int)incx[i], - (cuDataType *)&beta[i], y_ + offset, (int)incy[i], (int)group_size[i]); + cublas_native_named_func(func_name, func, err, handle, + get_cublas_operation(trans[i]), (int)m[i], (int)n[i], + (cuDataType*)&alpha[i], a_ + offset, (int)lda[i], + x_ + offset, (int)incx[i], (cuDataType*)&beta[i], + y_ + offset, (int)incy[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -533,14 +534,13 @@ inline sycl::event gemv_batch(const char *func_name, Func func, sycl::queue &que return done; } -#define GEMV_BATCH_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, \ - TYPE *alpha, const TYPE **a, int64_t *lda, const TYPE **x, \ - int64_t *incx, TYPE *beta, TYPE **y, int64_t *incy, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ - return gemv_batch(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, \ - x, incx, beta, y, incy, group_count, group_size, dependencies); \ +#define GEMV_BATCH_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ + sycl::event gemv_batch( \ + sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, TYPE* alpha, const TYPE** a, \ + int64_t* lda, const TYPE** x, int64_t* incx, TYPE* beta, TYPE** y, int64_t* incy, \ + int64_t group_count, int64_t* group_size, const std::vector& dependencies) { \ + return gemv_batch(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, \ + incx, beta, y, incy, group_count, group_size, dependencies); \ } GEMV_BATCH_LAUNCHER_USM(float, cublasSgemvBatched) @@ -550,72 +550,72 @@ GEMV_BATCH_LAUNCHER_USM(std::complex, cublasZgemvBatched) #undef GEMV_BATCH_LAUNCHER_USM -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const float *a, - int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const double *a, - int64_t lda, int64_t stride_a, const double *x, int64_t incx, - int64_t stride_x, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stride_a, const double* x, int64_t incx, + int64_t stride_x, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, double **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } template -inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose transa, +inline sycl::event gemm_batch_strided_usm_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - Ts alpha, const Ta *a, int64_t lda, int64_t stride_a, - const Tb *b, int64_t ldb, int64_t stride_b, Ts beta, - Tc *c, int64_t ldc, int64_t stride_c, + Ts alpha, const Ta* a, int64_t lda, int64_t stride_a, + const Tb* b, int64_t ldb, int64_t stride_b, Ts beta, + Tc* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuTypeA = typename CudaEquivalentType::Type; using cuTypeB = typename CudaEquivalentType::Type; using cuTypeC = typename CudaEquivalentType::Type; @@ -623,7 +623,7 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra overflow_check(m, n, k, lda, ldb, ldc, stride_a, stride_b, stride_c, batch_size); cublasGemmAlgo_t cublas_gemm_algo = CUBLAS_GEMM_DEFAULT; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { if (!verify_support(queue, sycl::aspect::fp16)) { throw oneapi::math::unimplemented( "blas", "sycl::half", "half is not supported by the device or the sycl compiler"); @@ -632,16 +632,16 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cublasStatus_t err; #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - CUBLAS_ERROR_FUNC_T( - "cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, handle, - get_cublas_operation(transa), get_cublas_operation(transb), m, n, k, &alpha, a, - get_cublas_datatype(), lda, stride_a, b, get_cublas_datatype(), - ldb, stride_b, &beta, c, get_cublas_datatype(), ldc, stride_c, batch_size, - get_cublas_datatype(), cublas_gemm_algo); + CUBLAS_ERROR_FUNC_T("cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, + handle, get_cublas_operation(transa), get_cublas_operation(transb), + m, n, k, &alpha, a, get_cublas_datatype(), lda, stride_a, + b, get_cublas_datatype(), ldb, stride_b, &beta, c, + get_cublas_datatype(), ldc, stride_c, batch_size, + get_cublas_datatype(), cublas_gemm_algo); #else CUBLAS_ERROR_FUNC_T_SYNC( "cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, handle, @@ -656,11 +656,11 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra } #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stride_a, const TYPE_B *b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stride_c, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stride_a, const TYPE_B* b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stride_c, \ + int64_t batch_size, const std::vector& dependencies) { \ return gemm_batch_strided_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, \ stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, \ batch_size, dependencies); \ @@ -678,11 +678,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::complex, std::complex, std: #undef GEMM_STRIDED_BATCH_LAUNCHER_USM #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stride_a, const TYPE_B *b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stride_c, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stride_a, const TYPE_B* b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stride_c, \ + int64_t batch_size, const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -695,11 +695,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, transpose *transb, - int64_t *m, int64_t *n, int64_t *k, Ts *alpha, const Ta **a, - int64_t *lda, const Tb **b, int64_t *ldb, Ts *beta, Tc **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event gemm_batch_usm_impl(sycl::queue& queue, transpose* transa, transpose* transb, + int64_t* m, int64_t* n, int64_t* k, Ts* alpha, const Ta** a, + int64_t* lda, const Tb** b, int64_t* ldb, Ts* beta, Tc** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using cuTypeA = typename CudaEquivalentType::Type; using cuTypeB = typename CudaEquivalentType::Type; using cuTypeC = typename CudaEquivalentType::Type; @@ -709,7 +709,7 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr } cublasGemmAlgo_t cublas_gemm_algo = CUBLAS_GEMM_DEFAULT; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { if (!verify_support(queue, sycl::aspect::fp16)) { throw oneapi::math::unimplemented( "blas", "sycl::half", "half is not supported by the device or the sycl compiler"); @@ -718,7 +718,7 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; cublasStatus_t err; @@ -727,10 +727,10 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr CUBLAS_ERROR_FUNC_T( "cublasGemmBatchedEx", cublasGemmBatchedEx, err, handle, get_cublas_operation(transa[i]), get_cublas_operation(transb[i]), (int)m[i], - (int)n[i], (int)k[i], &alpha[i], (const void *const *)(a + offset), - get_cublas_datatype(), (int)lda[i], (const void *const *)(b + offset), + (int)n[i], (int)k[i], &alpha[i], (const void* const*)(a + offset), + get_cublas_datatype(), (int)lda[i], (const void* const*)(b + offset), get_cublas_datatype(), (int)ldb[i], &beta[i], - (void *const *)(c + offset), get_cublas_datatype(), (int)ldc[i], + (void* const*)(c + offset), get_cublas_datatype(), (int)ldc[i], (int)group_size[i], get_cublas_datatype(), cublas_gemm_algo); #else CUBLAS_ERROR_FUNC_T_SYNC( @@ -750,11 +750,11 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr } #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return gemm_batch_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc, group_count, group_size, dependencies); \ } @@ -771,11 +771,11 @@ GEMM_BATCH_LAUNCHER_USM(std::complex, std::complex, std::complex #undef GEMM_BATCH_LAUNCHER_USM #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -787,63 +787,63 @@ GEMM_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_BATCH_LAUNCHER_USM -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } template -inline sycl::event trsm_batch(const char *func_name, Func func, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, T *alpha, const T **a, - int64_t *lda, T **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event trsm_batch(const char* func_name, Func func, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, T* alpha, const T** a, + int64_t* lda, T** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], ldb[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; cublasStatus_t err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **b_ = reinterpret_cast(b); + auto** a_ = reinterpret_cast(a); + auto** b_ = reinterpret_cast(b); cublas_native_named_func( func_name, func, err, handle, get_cublas_side_mode(left_right[i]), get_cublas_fill_mode(upper_lower[i]), get_cublas_operation(trans[i]), get_cublas_diag_type(unit_diag[i]), (int)m[i], (int)n[i], - (cuDataType *)&alpha[i], a_ + offset, (int)lda[i], b_ + offset, (int)ldb[i], + (cuDataType*)&alpha[i], a_ + offset, (int)lda[i], b_ + offset, (int)ldb[i], (int)group_size[i]); offset += group_size[i]; @@ -854,11 +854,11 @@ inline sycl::event trsm_batch(const char *func_name, Func func, sycl::queue &que } #define TRSM_BATCH_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, \ - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, TYPE *alpha, \ - const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, \ + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, TYPE* alpha, \ + const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return trsm_batch(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, \ dependencies); \ @@ -871,208 +871,208 @@ TRSM_BATCH_LAUNCHER_USM(std::complex, cublasZtrsmBatched) #undef TRSM_BATCH_LAUNCHER_USM -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, int64_t stride_a, float beta, - float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, int64_t stride_a, double beta, - double *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, const double** a, int64_t* lda, double** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, float **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, float** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, double **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, double** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } @@ -1081,122 +1081,122 @@ namespace row_major { // Buffer APIs -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, float beta, sycl::buffer &y, int64_t incy, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, float beta, sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, double beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, double beta, + sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, - int64_t stride_x, std::complex beta, sycl::buffer, 1> &y, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, + sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, sycl::buffer &c, int64_t ldc, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, \ int64_t batch_size) { \ throw unimplemented("blas", "gemm_batch", "for row_major layout"); \ } @@ -1214,377 +1214,377 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::complex, std::complex, std::com #undef GEMM_STRIDED_BATCH_LAUNCHER -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } // USM APIs -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, float **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, double **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t incx, - std::int64_t stridex, float *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, + std::int64_t stridex, float* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - std::int64_t stridex, double *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + std::int64_t stridex, double* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float beta, float *y, int64_t incy, int64_t stride_y, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float beta, float* y, int64_t incy, int64_t stride_y, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, const double *x, - int64_t incx, int64_t stride_x, double beta, double *y, int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, const double* x, + int64_t incx, int64_t stride_x, double beta, double* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, const float **x, int64_t *incx, float *beta, - float **y, int64_t *incy, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, const float** x, int64_t* incx, float* beta, + float** y, int64_t* incy, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, const double **x, int64_t *incx, - double *beta, double **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, const double** x, int64_t* incx, + double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const float *a, - int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const double *a, - int64_t lda, int64_t stride_a, const double *x, int64_t incx, - int64_t stride_x, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stride_a, const double* x, int64_t incx, + int64_t stride_x, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, double **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stride_a, const TYPE_B *b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stride_c, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stride_a, const TYPE_B* b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stride_c, \ + int64_t batch_size, const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", "for row_major layout"); \ } @@ -1602,11 +1602,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::complex, std::complex, std: #undef GEMM_STRIDED_BATCH_LAUNCHER_USM #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", "for row_major layout"); \ } @@ -1623,51 +1623,51 @@ GEMM_BATCH_LAUNCHER_USM(std::complex, std::complex, std::complex #undef GEMM_BATCH_LAUNCHER_USM -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } template -inline sycl::event trsm_batch(const char *func_name, Func func, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, T *alpha, const T **a, - int64_t *lda, T **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event trsm_batch(const char* func_name, Func func, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, T* alpha, const T** a, + int64_t* lda, T** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } #define TRSM_BATCH_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, \ - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, TYPE *alpha, \ - const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, \ + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, TYPE* alpha, \ + const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return trsm_batch(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, \ dependencies); \ @@ -1680,208 +1680,208 @@ TRSM_BATCH_LAUNCHER_USM(std::complex, cublasZtrsmBatched) #undef TRSM_BATCH_LAUNCHER_USM -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, int64_t stride_a, float beta, - float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, int64_t stride_a, double beta, - double *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, const double** a, int64_t* lda, double** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, float **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, float** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, double **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, double** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } diff --git a/src/blas/backends/cublas/cublas_extensions.cpp b/src/blas/backends/cublas/cublas_extensions.cpp index db6c0aa05..ab1157135 100644 --- a/src/blas/backends/cublas/cublas_extensions.cpp +++ b/src/blas/backends/cublas/cublas_extensions.cpp @@ -29,88 +29,88 @@ namespace column_major { // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } template -void omatcopy(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, +void omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); const int64_t logical_m = (trans == oneapi::math::transpose::nontrans ? m : n); const int64_t logical_n = (trans == oneapi::math::transpose::nontrans ? n : m); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), get_cublas_operation(trans), logical_m, logical_n, - (cuDataType *)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); + (cuDataType*)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); }); }); } #define OMATCOPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { \ omatcopy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb); \ } @@ -122,16 +122,16 @@ OMATCOPY_LAUNCHER(std::complex, cublasZgeam) #undef OMATCOPY_LAUNCHER template -void omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb) { \ + void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb) { \ omatcopy2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, lda, b, \ ldb, strideb); \ } @@ -142,53 +142,53 @@ OMATCOPY2_LAUNCHER(std::complex, "unimplemented") OMATCOPY2_LAUNCHER(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } template -void omatadd(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - T beta, sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + T beta, sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, (cuDataType *)&alpha, a_, - lda, (cuDataType *)&beta, b_, ldb, c_, ldc); + get_cublas_operation(transb), m, n, (cuDataType*)&alpha, a_, + lda, (cuDataType*)&beta, b_, ldb, c_, ldc); }); }); } #define OMATADD_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { \ + void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { \ omatadd(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, \ b, ldb, c, ldc); \ } @@ -202,95 +202,95 @@ OMATADD_LAUNCHER(std::complex, cublasZgeam) // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } template -sycl::event omatcopy(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); const int64_t logical_m = (trans == oneapi::math::transpose::nontrans ? m : n); const int64_t logical_n = (trans == oneapi::math::transpose::nontrans ? n : m); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), get_cublas_operation(trans), logical_m, logical_n, - (cuDataType *)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); + (cuDataType*)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); }); }); return done; } #define OMATCOPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, TYPE *b, int64_t ldb, \ - const std::vector &dependencies) { \ + sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, TYPE* b, int64_t ldb, \ + const std::vector& dependencies) { \ return omatcopy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, \ ldb, dependencies); \ } @@ -303,16 +303,16 @@ OMATCOPY_LAUNCHER_USM(std::complex, cublasZgeam) #undef OMATCOPY_LAUNCHER_USM template -sycl::event omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, int64_t stridea, T *b, - int64_t ldb, int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, int64_t stridea, T* b, + int64_t ldb, int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, \ - int64_t strideb, const std::vector &dependencies) { \ + sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, \ + int64_t strideb, const std::vector& dependencies) { \ return omatcopy2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, \ lda, b, ldb, strideb, dependencies); \ } @@ -323,58 +323,58 @@ OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER_USM -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } template -inline sycl::event omatadd(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - T beta, const T *b, int64_t ldb, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event omatadd(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + T beta, const T* b, int64_t ldb, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, (cuDataType *)&alpha, a_, - lda, (cuDataType *)&beta, b_, ldb, c_, ldc); + get_cublas_operation(transb), m, n, (cuDataType*)&alpha, a_, + lda, (cuDataType*)&beta, b_, ldb, c_, ldc); }); }); return done; } #define OMATADD_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, \ - const TYPE *b, int64_t ldb, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, \ + const TYPE* b, int64_t ldb, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return omatadd(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, \ lda, beta, b, ldb, c, ldc, dependencies); \ } @@ -392,88 +392,88 @@ namespace row_major { // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } template -void omatcopy(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, +void omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); const int64_t logical_m = (trans == oneapi::math::transpose::nontrans ? n : m); const int64_t logical_n = (trans == oneapi::math::transpose::nontrans ? m : n); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), get_cublas_operation(trans), logical_m, logical_n, - (cuDataType *)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); + (cuDataType*)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); }); }); } #define OMATCOPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { \ omatcopy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb); \ } @@ -485,16 +485,16 @@ OMATCOPY_LAUNCHER(std::complex, cublasZgeam) #undef OMATCOPY_LAUNCHER template -void omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb) { \ + void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb) { \ omatcopy2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, lda, b, \ ldb, strideb); \ } @@ -505,53 +505,53 @@ OMATCOPY2_LAUNCHER(std::complex, "unimplemented") OMATCOPY2_LAUNCHER(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } template -void omatadd(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - T beta, sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + T beta, sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), n, m, (cuDataType *)&alpha, a_, - lda, (cuDataType *)&beta, b_, ldb, c_, ldc); + get_cublas_operation(transb), n, m, (cuDataType*)&alpha, a_, + lda, (cuDataType*)&beta, b_, ldb, c_, ldc); }); }); } #define OMATADD_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { \ + void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { \ omatadd(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, \ b, ldb, c, ldc); \ } @@ -565,95 +565,95 @@ OMATADD_LAUNCHER(std::complex, cublasZgeam) // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } template -sycl::event omatcopy(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); const int64_t logical_m = (trans == oneapi::math::transpose::nontrans ? n : m); const int64_t logical_n = (trans == oneapi::math::transpose::nontrans ? m : n); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), get_cublas_operation(trans), logical_m, logical_n, - (cuDataType *)&alpha, a_, lda, nullptr, nullptr, ldb, b_, ldb); + (cuDataType*)&alpha, a_, lda, nullptr, nullptr, ldb, b_, ldb); }); }); return done; } #define OMATCOPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, TYPE *b, int64_t ldb, \ - const std::vector &dependencies) { \ + sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, TYPE* b, int64_t ldb, \ + const std::vector& dependencies) { \ return omatcopy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, \ ldb, dependencies); \ } @@ -666,16 +666,16 @@ OMATCOPY_LAUNCHER_USM(std::complex, cublasZgeam) #undef OMATCOPY_LAUNCHER_USM template -sycl::event omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, int64_t stridea, T *b, - int64_t ldb, int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, int64_t stridea, T* b, + int64_t ldb, int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, \ - int64_t strideb, const std::vector &dependencies) { \ + sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, \ + int64_t strideb, const std::vector& dependencies) { \ return omatcopy2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, \ lda, b, ldb, strideb, dependencies); \ } @@ -686,58 +686,58 @@ OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER_USM -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } template -inline sycl::event omatadd(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - T beta, const T *b, int64_t ldb, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event omatadd(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + T beta, const T* b, int64_t ldb, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), n, m, (cuDataType *)&alpha, a_, - lda, (cuDataType *)&beta, b_, ldb, c_, ldc); + get_cublas_operation(transb), n, m, (cuDataType*)&alpha, a_, + lda, (cuDataType*)&beta, b_, ldb, c_, ldc); }); }); return done; } #define OMATADD_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, \ - const TYPE *b, int64_t ldb, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, \ + const TYPE* b, int64_t ldb, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return omatadd(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, \ lda, beta, b, ldb, c, ldc, dependencies); \ } diff --git a/src/blas/backends/cublas/cublas_handle.hpp b/src/blas/backends/cublas/cublas_handle.hpp index 254c1c160..34a71b185 100644 --- a/src/blas/backends/cublas/cublas_handle.hpp +++ b/src/blas/backends/cublas/cublas_handle.hpp @@ -28,10 +28,10 @@ namespace cublas { template struct cublas_handle { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t cublas_handle_mapper_{}; ~cublas_handle() noexcept(false) { - for (auto &handle_pair : cublas_handle_mapper_) { + for (auto& handle_pair : cublas_handle_mapper_) { cublasStatus_t err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); diff --git a/src/blas/backends/cublas/cublas_helper.hpp b/src/blas/backends/cublas/cublas_helper.hpp index 27b4a6bf9..a67d89126 100644 --- a/src/blas/backends/cublas/cublas_helper.hpp +++ b/src/blas/backends/cublas/cublas_helper.hpp @@ -81,7 +81,7 @@ void overflow_check(Index index, Next... indices) { class cublas_error : virtual public std::runtime_error { protected: - inline const char *cublas_error_map(cublasStatus_t error) { + inline const char* cublas_error_map(cublasStatus_t error) { switch (error) { case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS"; @@ -133,7 +133,7 @@ class cublas_error : virtual public std::runtime_error { class cuda_error : virtual public std::runtime_error { protected: - inline const char *cuda_error_map(CUresult result) { + inline const char* cuda_error_map(CUresult result) { switch (result) { case CUDA_SUCCESS: return "CUDA_SUCCESS"; case CUDA_ERROR_NOT_PERMITTED: return "CUDA_ERROR_NOT_PERMITTED"; @@ -206,23 +206,22 @@ class cuda_error : virtual public std::runtime_error { cuStreamSynchronize(currentStreamId); template -inline void cublas_native_func(Func func, cublasStatus_t err, - cublasHandle_t handle, Types... args) { +inline void cublas_native_func(Func func, cublasStatus_t err, cublasHandle_t handle, + Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - CUBLAS_ERROR_FUNC(func, err, handle, args...) + CUBLAS_ERROR_FUNC(func, err, handle, args...) #else - CUBLAS_ERROR_FUNC_SYNC(func, err, handle, args...) + CUBLAS_ERROR_FUNC_SYNC(func, err, handle, args...) #endif }; template -inline void cublas_native_named_func(const char *func_name, Func func, - cublasStatus_t err, cublasHandle_t handle, - Types... args) { +inline void cublas_native_named_func(const char* func_name, Func func, cublasStatus_t err, + cublasHandle_t handle, Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, args...) + CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, args...) #else - CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, args...) + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, args...) #endif }; diff --git a/src/blas/backends/cublas/cublas_level1.cpp b/src/blas/backends/cublas/cublas_level1.cpp index 5b0e39978..830a1b1e0 100644 --- a/src/blas/backends/cublas/cublas_level1.cpp +++ b/src/blas/backends/cublas/cublas_level1.cpp @@ -32,16 +32,16 @@ namespace column_major { // Level 1 template -inline void asum(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void asum(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -49,8 +49,8 @@ inline void asum(const char *func_name, Func func, sycl::queue &queue, int64_t n // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto res_ = sc.get_mem(res_acc); cublasStatus_t err; // ASUM does not support negative index cublas_native_named_func(func_name, func, err, handle, n, x_, std::abs(incx), res_); @@ -63,8 +63,8 @@ inline void asum(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define ASUM_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ asum(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } ASUM_LAUNCHER(float, float, cublasSasum) @@ -74,26 +74,26 @@ ASUM_LAUNCHER(std::complex, double, cublasDzasum) #undef ASUM_LAUNCHER template -inline void scal(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 a, - sycl::buffer &x, int64_t incx) { +inline void scal(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1 a, + sycl::buffer& x, int64_t incx) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; // SCAL does not support negative incx - cublas_native_named_func(func_name, func, err, handle, n, (cuDataType1 *)&a, x_, + cublas_native_named_func(func_name, func, err, handle, n, (cuDataType1*)&a, x_, std::abs(incx)); }); }); } #define SCAL_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void scal(sycl::queue &queue, int64_t n, TYPE1 a, sycl::buffer &x, int64_t incx) { \ + void scal(sycl::queue& queue, int64_t n, TYPE1 a, sycl::buffer& x, int64_t incx) { \ scal(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, a, x, incx); \ } SCAL_LAUNCHER(float, float, cublasSscal) @@ -105,27 +105,27 @@ SCAL_LAUNCHER(double, std::complex, cublasZdscal) #undef SCAL_LAUNCHER template -inline void axpy(const char *func_name, Func func, sycl::queue &queue, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void axpy(const char* func_name, Func func, sycl::queue& queue, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - cublas_native_named_func(func_name, func, err, handle, n, (cuDataType *)&alpha, x_, - incx, y_, incy); + cublas_native_named_func(func_name, func, err, handle, n, (cuDataType*)&alpha, x_, incx, + y_, incy); }); }); } #define AXPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void axpy(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void axpy(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ axpy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy); \ } @@ -135,39 +135,39 @@ AXPY_LAUNCHER(std::complex, cublasCaxpy) AXPY_LAUNCHER(std::complex, cublasZaxpy) #undef AXPY_LAUNCHER -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } template -inline void rotg(const char *func_name, Func func, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +inline void rotg(const char* func_name, Func func, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto s_acc = s.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -175,10 +175,10 @@ inline void rotg(const char *func_name, Func func, sycl::queue &queue, sycl::buf // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); - auto s_ = sc.get_mem(s_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); + auto s_ = sc.get_mem(s_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, a_, b_, c_, s_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -190,8 +190,8 @@ inline void rotg(const char *func_name, Func func, sycl::queue &queue, sycl::buf } #define ROTG_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, \ - sycl::buffer &c, sycl::buffer &s) { \ + void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, \ + sycl::buffer& c, sycl::buffer& s) { \ rotg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, a, b, c, s); \ } @@ -202,16 +202,16 @@ ROTG_LAUNCHER(std::complex, double, cublasZrotg) #undef ROTG_LAUNCHER template -inline void rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer ¶m) { +inline void rotm(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& param) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); auto param_acc = param.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -219,9 +219,9 @@ inline void rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto param_ = sc.get_mem(param_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto param_ = sc.get_mem(param_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, param_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -233,8 +233,8 @@ inline void rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define ROTM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { \ + void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy, sycl::buffer& param) { \ rotm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, param); \ } @@ -243,17 +243,17 @@ ROTM_LAUNCHER(double, cublasDrotm) #undef ROTM_LAUNCHER template -inline void copy(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void copy(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy); }); @@ -261,8 +261,8 @@ inline void copy(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define COPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ copy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -273,16 +273,16 @@ COPY_LAUNCHER(std::complex, cublasZcopy) #undef COPY_LAUNCHER template -inline void dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &result) { +inline void dot(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& result) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -290,9 +290,9 @@ inline void dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto res_ = sc.get_mem(res_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -304,8 +304,8 @@ inline void dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, } #define DOT_LAUNCHER(EXT, TYPE, CUBLAS_ROUTINE) \ - void dot##EXT(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, const int64_t incy, sycl::buffer &result) { \ + void dot##EXT(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, const int64_t incy, sycl::buffer& result) { \ dot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, result); \ } DOT_LAUNCHER(, float, cublasSdot) @@ -317,17 +317,17 @@ DOT_LAUNCHER(u, std::complex, cublasZdotu) #undef DOT_LAUNCHER template -inline void rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &y, int64_t incy, +inline void rot(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& y, int64_t incy, T2 c, T3 s) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; using cuDataType3 = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -335,18 +335,18 @@ inline void rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. // cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, - (cuDataType2 *)&c, (cuDataType3 *)&s); + (cuDataType2*)&c, (cuDataType3*)&s); }); }); } #define ROT_LAUNCHER(TYPE1, TYPE2, TYPE3, CUBLAS_ROUTINE) \ - void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, int64_t incy, TYPE2 c, TYPE3 s) { \ + void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, int64_t incy, TYPE2 c, TYPE3 s) { \ rot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s); \ } @@ -356,15 +356,15 @@ ROT_LAUNCHER(std::complex, float, float, cublasCsrot) ROT_LAUNCHER(std::complex, double, double, cublasZdrot) #undef ROT_LAUNCHER -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { overflow_check(n, incx, incy); // cuBLAS does not support sdot so we need to mimic sdot. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.get_access(cgh); auto y_acc = y.get_access(cgh); auto res_acc = result.get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -372,9 +372,9 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto res_ = sc.get_mem(res_acc); cublasStatus_t err; cublas_native_func(cublasSdot, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -388,23 +388,23 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, result.get_host_access(sycl::read_write)[0] += sb; } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", "for column_major layout"); } template -inline void rotmg(const char *func_name, Func func, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, T y1, sycl::buffer ¶m) { +inline void rotmg(const char* func_name, Func func, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, T y1, sycl::buffer& param) { using cuDataType = typename CudaEquivalentType::Type; sycl::buffer y1_buff(&y1, sycl::range<1>(1)); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto d1_acc = d1.template get_access(cgh); auto d2_acc = d2.template get_access(cgh); auto x1_acc = x1.template get_access(cgh); auto y1_acc = y1_buff.template get_access(cgh); auto param_acc = param.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -412,11 +412,11 @@ inline void rotmg(const char *func_name, Func func, sycl::queue &queue, sycl::bu // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto d1_ = sc.get_mem(d1_acc); - auto d2_ = sc.get_mem(d2_acc); - auto x1_ = sc.get_mem(x1_acc); - auto y1_ = sc.get_mem(y1_acc); - auto param_ = sc.get_mem(param_acc); + auto d1_ = sc.get_mem(d1_acc); + auto d2_ = sc.get_mem(d2_acc); + auto x1_ = sc.get_mem(x1_acc); + auto y1_ = sc.get_mem(y1_acc); + auto param_ = sc.get_mem(param_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, d1_, d2_, x1_, y1_, param_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -428,8 +428,8 @@ inline void rotmg(const char *func_name, Func func, sycl::queue &queue, sycl::bu } #define ROTMG_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, \ - sycl::buffer &x1, TYPE y1, sycl::buffer ¶m) { \ + void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, \ + sycl::buffer& x1, TYPE y1, sycl::buffer& param) { \ rotmg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, d1, d2, x1, y1, param); \ } @@ -438,8 +438,8 @@ ROTMG_LAUNCHER(double, cublasDrotmg) #undef ROTMG_LAUNCHER template -inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void iamax(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); // cuBLAS does not support int64_t as return type for the data. So we need to @@ -450,10 +450,10 @@ inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t // to elementwise copy the data between two buffer, or allow reinterpret cast // to convert to different type with different typesize size. sycl::buffer int_res_buff{ sycl::range<1>(1) }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto int_res_acc = int_res_buff.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -461,8 +461,8 @@ inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto int_res_ = sc.get_mem(int_res_acc); + auto x_ = sc.get_mem(x_acc); + auto int_res_ = sc.get_mem(int_res_acc); cublasStatus_t err; // For negative incx, iamax returns 0. This behaviour is similar to that of // reference netlib BLAS. @@ -474,7 +474,7 @@ inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto int_res_acc = int_res_buff.template get_access(cgh); auto result_acc = result.template get_access(cgh); cgh.single_task( @@ -483,8 +483,8 @@ inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t } #define IAMAX_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamax(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } IAMAX_LAUNCHER(float, cublasIsamax) @@ -494,17 +494,17 @@ IAMAX_LAUNCHER(std::complex, cublasIzamax) #undef IAMAX_LAUNCHER template -inline void swap(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void swap(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy); }); @@ -512,8 +512,8 @@ inline void swap(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define SWAP_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ swap(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -524,8 +524,8 @@ SWAP_LAUNCHER(std::complex, cublasZswap) #undef SWAP_LAUNCHER template -inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void iamin(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); // cuBLAS does not support int64_t as return type for the data. So we need to @@ -536,10 +536,10 @@ inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t // to elementwise copy the data between two buffer, or allow reinterpret cast // to convert to different type with different typesize size. sycl::buffer int_res_buff{ sycl::range<1>(1) }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto int_res_acc = int_res_buff.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -547,8 +547,8 @@ inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto int_res_ = sc.get_mem(int_res_acc); + auto x_ = sc.get_mem(x_acc); + auto int_res_ = sc.get_mem(int_res_acc); cublasStatus_t err; // For negative incx, iamin returns 0. This behaviour is similar to that of // implemented as a reference IAMIN. @@ -560,7 +560,7 @@ inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto int_res_acc = int_res_buff.template get_access(cgh); auto result_acc = result.template get_access(cgh); cgh.single_task( @@ -569,8 +569,8 @@ inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t } #define IAMIN_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamin(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } IAMIN_LAUNCHER(float, cublasIsamin) @@ -580,16 +580,16 @@ IAMIN_LAUNCHER(std::complex, cublasIzamin) #undef IAMIN_LAUNCHER template -inline void nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void nrm2(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -597,8 +597,8 @@ inline void nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto res_ = sc.get_mem(res_acc); cublasStatus_t err; // NRM2 does not support negative index cublas_native_named_func(func_name, func, err, handle, n, x_, std::abs(incx), res_); @@ -611,8 +611,8 @@ inline void nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define NRM2_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ nrm2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } NRM2_LAUNCHER(float, float, cublasSnrm2) @@ -625,24 +625,24 @@ NRM2_LAUNCHER(std::complex, double, cublasDznrm2) // Level 1 template -inline sycl::event asum(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T1 *x, const int64_t incx, T2 *result, - const std::vector &dependencies) { +inline sycl::event asum(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T1* x, const int64_t incx, T2* result, + const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto res_ = reinterpret_cast(result); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -658,8 +658,8 @@ inline sycl::event asum(const char *func_name, Func func, sycl::queue &queue, in } #define ASUM_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event asum(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event asum(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return asum(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } ASUM_LAUNCHER_USM(float, float, cublasSasum) @@ -669,22 +669,22 @@ ASUM_LAUNCHER_USM(std::complex, double, cublasDzasum) #undef ASUM_LAUNCHER_USM template -inline sycl::event scal(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 a, - T2 *x, int64_t incx, const std::vector &dependencies) { +inline sycl::event scal(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1 a, + T2* x, int64_t incx, const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); + auto x_ = reinterpret_cast(x); cublasStatus_t err; // SCAL does not support negative incx - cublas_native_named_func(func_name, func, err, handle, n, (cuDataType1 *)&a, x_, + cublas_native_named_func(func_name, func, err, handle, n, (cuDataType1*)&a, x_, std::abs(incx)); }); }); @@ -692,8 +692,8 @@ inline sycl::event scal(const char *func_name, Func func, sycl::queue &queue, in } #define SCAL_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event scal(sycl::queue &queue, int64_t n, TYPE1 a, TYPE2 *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event scal(sycl::queue& queue, int64_t n, TYPE1 a, TYPE2* x, int64_t incx, \ + const std::vector& dependencies) { \ return scal(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, a, x, incx, dependencies); \ } SCAL_LAUNCHER_USM(float, float, cublasSscal) @@ -705,31 +705,31 @@ SCAL_LAUNCHER_USM(double, std::complex, cublasZdscal) #undef SCAL_LAUNCHER_USM template -inline sycl::event axpy(const char *func_name, Func func, sycl::queue &queue, int64_t n, T alpha, - const T *x, int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event axpy(const char* func_name, Func func, sycl::queue& queue, int64_t n, T alpha, + const T* x, int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; - cublas_native_named_func(func_name, func, err, handle, n, (cuDataType *)&alpha, x_, - incx, y_, incy); + cublas_native_named_func(func_name, func, err, handle, n, (cuDataType*)&alpha, x_, incx, + y_, incy); }); }); return done; } #define AXPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event axpy(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - TYPE *y, int64_t incy, const std::vector &dependencies) { \ + sycl::event axpy(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + TYPE* y, int64_t incy, const std::vector& dependencies) { \ return axpy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, \ dependencies); \ } @@ -740,32 +740,32 @@ AXPY_LAUNCHER_USM(std::complex, cublasCaxpy) AXPY_LAUNCHER_USM(std::complex, cublasZaxpy) #undef AXPY_LAUNCHER_USM -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } template -inline sycl::event rotg(const char *func_name, Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, - T1 *s, const std::vector &dependencies) { +inline sycl::event rotg(const char* func_name, Func func, sycl::queue& queue, T1* a, T1* b, T2* c, + T1* s, const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; auto ctx = queue.get_context(); @@ -783,17 +783,17 @@ inline sycl::event rotg(const char *func_name, Func func, sycl::queue &queue, T1 "If any pointer is only device accessible, all must be device accessible"); } } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); - auto s_ = reinterpret_cast(s); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); if (results_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -808,8 +808,8 @@ inline sycl::event rotg(const char *func_name, Func func, sycl::queue &queue, T1 } #define ROTG_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event rotg(sycl::queue &queue, TYPE1 *a, TYPE1 *b, TYPE2 *c, TYPE1 *s, \ - const std::vector &dependencies) { \ + sycl::event rotg(sycl::queue& queue, TYPE1* a, TYPE1* b, TYPE2* c, TYPE1* s, \ + const std::vector& dependencies) { \ return rotg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, a, b, c, s, dependencies); \ } @@ -820,21 +820,21 @@ ROTG_LAUNCHER_USM(std::complex, double, cublasZrotg) #undef ROTG_LAUNCHER_USM template -inline sycl::event rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n, T *x, - int64_t incx, T *y, int64_t incy, T *param, - const std::vector &dependencies) { +inline sycl::event rotm(const char* func_name, Func func, sycl::queue& queue, int64_t n, T* x, + int64_t incx, T* y, int64_t incy, T* param, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto param_ = reinterpret_cast(param); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto param_ = reinterpret_cast(param); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, param_); }); @@ -843,8 +843,8 @@ inline sycl::event rotm(const char *func_name, Func func, sycl::queue &queue, in } #define ROTM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event rotm(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - TYPE *param, const std::vector &dependencies) { \ + sycl::event rotm(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + TYPE* param, const std::vector& dependencies) { \ return rotm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, param, \ dependencies); \ } @@ -854,20 +854,20 @@ ROTM_LAUNCHER_USM(double, cublasDrotm) #undef ROTM_LAUNCHER_USM template -inline sycl::event copy(const char *func_name, Func func, sycl::queue &queue, int64_t n, const T *x, - int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event copy(const char* func_name, Func func, sycl::queue& queue, int64_t n, const T* x, + int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy); }); @@ -876,8 +876,8 @@ inline sycl::event copy(const char *func_name, Func func, sycl::queue &queue, in } #define COPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event copy(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event copy(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return copy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -888,23 +888,23 @@ COPY_LAUNCHER_USM(std::complex, cublasZcopy) #undef COPY_LAUNCHER_USM template -inline sycl::event dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, const T *x, - const int64_t incx, const T *y, int64_t incy, T *result, - const std::vector &dependencies) { +inline sycl::event dot(const char* func_name, Func func, sycl::queue& queue, int64_t n, const T* x, + const int64_t incx, const T* y, int64_t incy, T* result, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto res_ = reinterpret_cast(result); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -919,9 +919,9 @@ inline sycl::event dot(const char *func_name, Func func, sycl::queue &queue, int } #define DOT_LAUNCHER_USM(EXT, TYPE, CUBLAS_ROUTINE) \ - sycl::event dot##EXT(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - const TYPE *y, const int64_t incy, TYPE *result, \ - const std::vector &dependencies) { \ + sycl::event dot##EXT(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + const TYPE* y, const int64_t incy, TYPE* result, \ + const std::vector& dependencies) { \ return dot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, result, \ dependencies); \ } @@ -934,34 +934,34 @@ DOT_LAUNCHER_USM(u, std::complex, cublasZdotu) #undef DOT_LAUNCHER_USM template -inline sycl::event rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 *x, - const int64_t incx, T1 *y, int64_t incy, T2 c, T3 s, - const std::vector &dependencies) { +inline sycl::event rot(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1* x, + const int64_t incx, T1* y, int64_t incy, T2 c, T3 s, + const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; using cuDataType3 = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, - (cuDataType2 *)&c, (cuDataType3 *)&s); + (cuDataType2*)&c, (cuDataType3*)&s); }); }); return done; } #define ROT_LAUNCHER_USM(TYPE1, TYPE2, TYPE3, CUBLAS_ROUTINE) \ - sycl::event rot(sycl::queue &queue, int64_t n, TYPE1 *x, const int64_t incx, TYPE1 *y, \ + sycl::event rot(sycl::queue& queue, int64_t n, TYPE1* x, const int64_t incx, TYPE1* y, \ int64_t incy, TYPE2 c, TYPE3 s, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return rot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s, \ dependencies); \ } @@ -972,23 +972,23 @@ ROT_LAUNCHER_USM(std::complex, float, float, cublasCsrot) ROT_LAUNCHER_USM(std::complex, double, double, cublasZdrot) #undef ROT_LAUNCHER_USM -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { overflow_check(n, incx, incy); bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; // cuBLAS does not support sdsdot so we need to mimic sdot. - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto res_ = reinterpret_cast(result); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -1017,14 +1017,14 @@ sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int6 } } -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { throw unimplemented("blas", "dot", "for column_major layout"); } template -inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T *d1, T *d2, T *x1, - T y1, T *param, const std::vector &dependencies) { +inline sycl::event rotmg(const char* func_name, Func func, sycl::queue& queue, T* d1, T* d2, T* x1, + T y1, T* param, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; auto ctx = queue.get_context(); bool results_on_device = (sycl::get_pointer_type(d1, ctx) == sycl::usm::alloc::device || @@ -1039,22 +1039,22 @@ inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T "If any pointer is only device accessible, all must be device accessible"); } } - cuDataType *y1_; + cuDataType* y1_; if (results_on_device) { y1_ = sycl::malloc_device(1, queue); queue.memcpy(y1_, &y1, sizeof(cuDataType)).wait(); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto d1_ = reinterpret_cast(d1); - auto d2_ = reinterpret_cast(d2); - auto x1_ = reinterpret_cast(x1); - auto param_ = reinterpret_cast(param); + auto d1_ = reinterpret_cast(d1); + auto d2_ = reinterpret_cast(d2); + auto x1_ = reinterpret_cast(x1); + auto param_ = reinterpret_cast(param); cublasStatus_t err; if (results_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); @@ -1062,7 +1062,7 @@ inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST); } else { - auto y1_c = reinterpret_cast(&y1); + auto y1_c = reinterpret_cast(&y1); cublas_native_named_func(func_name, func, err, handle, d1_, d2_, x1_, y1_c, param_); } }); @@ -1076,8 +1076,8 @@ inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T } #define ROTMG_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event rotmg(sycl::queue &queue, TYPE *d1, TYPE *d2, TYPE *x1, TYPE y1, TYPE *param, \ - const std::vector &dependencies) { \ + sycl::event rotmg(sycl::queue& queue, TYPE* d1, TYPE* d2, TYPE* x1, TYPE y1, TYPE* param, \ + const std::vector& dependencies) { \ return rotmg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, d1, d2, x1, y1, param, dependencies); \ } @@ -1086,9 +1086,9 @@ ROTMG_LAUNCHER_USM(double, cublasDrotmg) #undef ROTMG_LAUNCHER_USM template -inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T *x, const int64_t incx, int64_t *result, - const std::vector &dependencies) { +inline sycl::event iamax(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T* x, const int64_t incx, int64_t* result, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); // cuBLAS does not support int64_t as return type for the data. So we need to @@ -1097,7 +1097,7 @@ inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, i // This change may cause failure as the result of integer overflow // based on the size. int int_res = 0; - int *int_res_p = nullptr; + int* int_res_p = nullptr; bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; if (result_on_device) { @@ -1106,14 +1106,14 @@ inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, i else { int_res_p = &int_res; } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); + auto x_ = reinterpret_cast(x); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -1128,7 +1128,7 @@ inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, i }); done.wait(); if (result_on_device) { - auto last_ev = queue.submit([&](sycl::handler &cgh) { + auto last_ev = queue.submit([&](sycl::handler& cgh) { cgh.single_task([=]() { *result = std::max((int64_t)*int_res_p - 1, (int64_t)0); }); }); last_ev.wait(); @@ -1142,8 +1142,8 @@ inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, i } #define IAMAX_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event iamax(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamax(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamax(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } IAMAX_LAUNCHER_USM(float, cublasIsamax) @@ -1153,20 +1153,20 @@ IAMAX_LAUNCHER_USM(std::complex, cublasIzamax) #undef IAMAX_LAUNCHER_USM template -inline sycl::event swap(const char *func_name, Func func, sycl::queue &queue, int64_t n, T *x, - int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event swap(const char* func_name, Func func, sycl::queue& queue, int64_t n, T* x, + int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy); }); @@ -1175,8 +1175,8 @@ inline sycl::event swap(const char *func_name, Func func, sycl::queue &queue, in } #define SWAP_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event swap(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event swap(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return swap(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1187,9 +1187,9 @@ SWAP_LAUNCHER_USM(std::complex, cublasZswap) #undef SWAP_LAUNCHER_USM template -inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T *x, const int64_t incx, int64_t *result, - const std::vector &dependencies) { +inline sycl::event iamin(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T* x, const int64_t incx, int64_t* result, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); // cuBLAS does not support int64_t as return type for the data. So we need to @@ -1198,7 +1198,7 @@ inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, i // This change may cause failure as the result of integer overflow // based on the size. int int_res = 0; - int *int_res_p = nullptr; + int* int_res_p = nullptr; bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; if (result_on_device) { @@ -1207,14 +1207,14 @@ inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, i else { int_res_p = &int_res; } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); + auto x_ = reinterpret_cast(x); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -1229,7 +1229,7 @@ inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, i }); done.wait(); if (result_on_device) { - auto last_ev = queue.submit([&](sycl::handler &cgh) { + auto last_ev = queue.submit([&](sycl::handler& cgh) { cgh.single_task([=]() { *result = std::max((int64_t)*int_res_p - 1, (int64_t)0); }); }); last_ev.wait(); @@ -1243,8 +1243,8 @@ inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, i } #define IAMIN_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event iamin(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamin(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamin(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } IAMIN_LAUNCHER_USM(float, cublasIsamin) @@ -1254,24 +1254,24 @@ IAMIN_LAUNCHER_USM(std::complex, cublasIzamin) #undef IAMIN_LAUNCHER_USM template -inline sycl::event nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T1 *x, const int64_t incx, T2 *result, - const std::vector &dependencies) { +inline sycl::event nrm2(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T1* x, const int64_t incx, T2* result, + const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto res_ = reinterpret_cast(result); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -1287,8 +1287,8 @@ inline sycl::event nrm2(const char *func_name, Func func, sycl::queue &queue, in } #define NRM2_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event nrm2(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event nrm2(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return nrm2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } NRM2_LAUNCHER_USM(float, float, cublasSnrm2) @@ -1304,14 +1304,14 @@ namespace row_major { // Level 1 template -inline void asum(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void asum(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "asum", "for row_major layout"); } #define ASUM_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ asum(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } ASUM_LAUNCHER(float, float, cublasSasum) @@ -1321,13 +1321,13 @@ ASUM_LAUNCHER(std::complex, double, cublasDzasum) #undef ASUM_LAUNCHER template -inline void scal(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 a, - sycl::buffer &x, int64_t incx) { +inline void scal(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1 a, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "scal", "for row_major layout"); } #define SCAL_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void scal(sycl::queue &queue, int64_t n, TYPE1 a, sycl::buffer &x, int64_t incx) { \ + void scal(sycl::queue& queue, int64_t n, TYPE1 a, sycl::buffer& x, int64_t incx) { \ scal(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, a, x, incx); \ } SCAL_LAUNCHER(float, float, cublasSscal) @@ -1339,14 +1339,14 @@ SCAL_LAUNCHER(double, std::complex, cublasZdscal) #undef SCAL_LAUNCHER template -inline void axpy(const char *func_name, Func func, sycl::queue &queue, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void axpy(const char* func_name, Func func, sycl::queue& queue, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpy", "for row_major layout"); } #define AXPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void axpy(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void axpy(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ axpy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy); \ } @@ -1356,37 +1356,37 @@ AXPY_LAUNCHER(std::complex, cublasCaxpy) AXPY_LAUNCHER(std::complex, cublasZaxpy) #undef AXPY_LAUNCHER -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } template -inline void rotg(const char *func_name, Func func, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +inline void rotg(const char* func_name, Func func, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { throw unimplemented("blas", "rotg", "for row_major layout"); } #define ROTG_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, \ - sycl::buffer &c, sycl::buffer &s) { \ + void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, \ + sycl::buffer& c, sycl::buffer& s) { \ rotg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, a, b, c, s); \ } @@ -1397,15 +1397,15 @@ ROTG_LAUNCHER(std::complex, double, cublasZrotg) #undef ROTG_LAUNCHER template -inline void rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer ¶m) { +inline void rotm(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& param) { throw unimplemented("blas", "rotm", "for row_major layout"); } #define ROTM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { \ + void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy, sycl::buffer& param) { \ rotm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, param); \ } @@ -1414,14 +1414,14 @@ ROTM_LAUNCHER(double, cublasDrotm) #undef ROTM_LAUNCHER template -inline void copy(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void copy(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "copy", "for row_major layout"); } #define COPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ copy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -1432,15 +1432,15 @@ COPY_LAUNCHER(std::complex, cublasZcopy) #undef COPY_LAUNCHER template -inline void dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &result) { +inline void dot(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& result) { throw unimplemented("blas", "dot", "for row_major layout"); } #define DOT_LAUNCHER(EXT, TYPE, CUBLAS_ROUTINE) \ - void dot##EXT(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, const int64_t incy, sycl::buffer &result) { \ + void dot##EXT(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, const int64_t incy, sycl::buffer& result) { \ dot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, result); \ } DOT_LAUNCHER(, float, cublasSdot) @@ -1452,15 +1452,15 @@ DOT_LAUNCHER(u, std::complex, cublasZdotu) #undef DOT_LAUNCHER template -inline void rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &y, int64_t incy, +inline void rot(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& y, int64_t incy, T2 c, T3 s) { throw unimplemented("blas", "rot", "for row_major layout"); } #define ROT_LAUNCHER(TYPE1, TYPE2, TYPE3, CUBLAS_ROUTINE) \ - void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, int64_t incy, TYPE2 c, TYPE3 s) { \ + void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, int64_t incy, TYPE2 c, TYPE3 s) { \ rot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s); \ } @@ -1470,25 +1470,25 @@ ROT_LAUNCHER(std::complex, float, float, cublasCsrot) ROT_LAUNCHER(std::complex, double, double, cublasZdrot) #undef ROT_LAUNCHER -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "sdsdot", "for row_major layout"); } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", "for row_major layout"); } template -inline void rotmg(const char *func_name, Func func, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, T y1, sycl::buffer ¶m) { +inline void rotmg(const char* func_name, Func func, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, T y1, sycl::buffer& param) { throw unimplemented("blas", "rotmg", "for row_major layout"); } #define ROTMG_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, \ - sycl::buffer &x1, TYPE y1, sycl::buffer ¶m) { \ + void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, \ + sycl::buffer& x1, TYPE y1, sycl::buffer& param) { \ rotmg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, d1, d2, x1, y1, param); \ } @@ -1497,14 +1497,14 @@ ROTMG_LAUNCHER(double, cublasDrotmg) #undef ROTMG_LAUNCHER template -inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void iamax(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "iamax", "for row_major layout"); } #define IAMAX_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamax(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } IAMAX_LAUNCHER(float, cublasIsamax) @@ -1514,14 +1514,14 @@ IAMAX_LAUNCHER(std::complex, cublasIzamax) #undef IAMAX_LAUNCHER template -inline void swap(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void swap(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "swap", "for row_major layout"); } #define SWAP_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ swap(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -1532,14 +1532,14 @@ SWAP_LAUNCHER(std::complex, cublasZswap) #undef SWAP_LAUNCHER template -inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void iamin(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "iamin", "for row_major layout"); } #define IAMIN_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamin(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } IAMIN_LAUNCHER(float, cublasIsamin) @@ -1549,14 +1549,14 @@ IAMIN_LAUNCHER(std::complex, cublasIzamin) #undef IAMIN_LAUNCHER template -inline void nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void nrm2(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "nrm2", "for row_major layout"); } #define NRM2_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ nrm2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } NRM2_LAUNCHER(float, float, cublasSnrm2) @@ -1569,15 +1569,15 @@ NRM2_LAUNCHER(std::complex, double, cublasDznrm2) // Level 1 template -inline sycl::event asum(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T1 *x, const int64_t incx, T2 *result, - const std::vector &dependencies) { +inline sycl::event asum(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T1* x, const int64_t incx, T2* result, + const std::vector& dependencies) { throw unimplemented("blas", "asum", "for row_major layout"); } #define ASUM_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event asum(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event asum(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return asum(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } ASUM_LAUNCHER_USM(float, float, cublasSasum) @@ -1587,14 +1587,14 @@ ASUM_LAUNCHER_USM(std::complex, double, cublasDzasum) #undef ASUM_LAUNCHER_USM template -inline sycl::event scal(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 a, - T2 *x, int64_t incx, const std::vector &dependencies) { +inline sycl::event scal(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1 a, + T2* x, int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "scal", "for row_major layout"); } #define SCAL_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event scal(sycl::queue &queue, int64_t n, TYPE1 a, TYPE2 *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event scal(sycl::queue& queue, int64_t n, TYPE1 a, TYPE2* x, int64_t incx, \ + const std::vector& dependencies) { \ return scal(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, a, x, incx, dependencies); \ } SCAL_LAUNCHER_USM(float, float, cublasSscal) @@ -1606,15 +1606,15 @@ SCAL_LAUNCHER_USM(double, std::complex, cublasZdscal) #undef SCAL_LAUNCHER_USM template -inline sycl::event axpy(const char *func_name, Func func, sycl::queue &queue, int64_t n, T alpha, - const T *x, int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event axpy(const char* func_name, Func func, sycl::queue& queue, int64_t n, T alpha, + const T* x, int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpy", "for row_major layout"); } #define AXPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event axpy(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - TYPE *y, int64_t incy, const std::vector &dependencies) { \ + sycl::event axpy(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + TYPE* y, int64_t incy, const std::vector& dependencies) { \ return axpy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, \ dependencies); \ } @@ -1625,38 +1625,38 @@ AXPY_LAUNCHER_USM(std::complex, cublasCaxpy) AXPY_LAUNCHER_USM(std::complex, cublasZaxpy) #undef AXPY_LAUNCHER_USM -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } template -inline sycl::event rotg(const char *func_name, Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, - T1 *s, const std::vector &dependencies) { +inline sycl::event rotg(const char* func_name, Func func, sycl::queue& queue, T1* a, T1* b, T2* c, + T1* s, const std::vector& dependencies) { throw unimplemented("blas", "rotg", "for row_major layout"); } #define ROTG_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event rotg(sycl::queue &queue, TYPE1 *a, TYPE1 *b, TYPE2 *c, TYPE1 *s, \ - const std::vector &dependencies) { \ + sycl::event rotg(sycl::queue& queue, TYPE1* a, TYPE1* b, TYPE2* c, TYPE1* s, \ + const std::vector& dependencies) { \ return rotg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, a, b, c, s, dependencies); \ } @@ -1667,15 +1667,15 @@ ROTG_LAUNCHER_USM(std::complex, double, cublasZrotg) #undef ROTG_LAUNCHER_USM template -inline sycl::event rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n, T *x, - int64_t incx, T *y, int64_t incy, T *param, - const std::vector &dependencies) { +inline sycl::event rotm(const char* func_name, Func func, sycl::queue& queue, int64_t n, T* x, + int64_t incx, T* y, int64_t incy, T* param, + const std::vector& dependencies) { throw unimplemented("blas", "rotm", "for row_major layout"); } #define ROTM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event rotm(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - TYPE *param, const std::vector &dependencies) { \ + sycl::event rotm(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + TYPE* param, const std::vector& dependencies) { \ return rotm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, param, \ dependencies); \ } @@ -1685,15 +1685,15 @@ ROTM_LAUNCHER_USM(double, cublasDrotm) #undef ROTM_LAUNCHER_USM template -inline sycl::event copy(const char *func_name, Func func, sycl::queue &queue, int64_t n, const T *x, - int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event copy(const char* func_name, Func func, sycl::queue& queue, int64_t n, const T* x, + int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "copy", "for row_major layout"); } #define COPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event copy(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event copy(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return copy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1704,16 +1704,16 @@ COPY_LAUNCHER_USM(std::complex, cublasZcopy) #undef COPY_LAUNCHER_USM template -inline sycl::event dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, const T *x, - const int64_t incx, const T *y, int64_t incy, T *result, - const std::vector &dependencies) { +inline sycl::event dot(const char* func_name, Func func, sycl::queue& queue, int64_t n, const T* x, + const int64_t incx, const T* y, int64_t incy, T* result, + const std::vector& dependencies) { throw unimplemented("blas", "dot", "for row_major layout"); } #define DOT_LAUNCHER_USM(EXT, TYPE, CUBLAS_ROUTINE) \ - sycl::event dot##EXT(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - const TYPE *y, const int64_t incy, TYPE *result, \ - const std::vector &dependencies) { \ + sycl::event dot##EXT(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + const TYPE* y, const int64_t incy, TYPE* result, \ + const std::vector& dependencies) { \ return dot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, result, \ dependencies); \ } @@ -1726,16 +1726,16 @@ DOT_LAUNCHER_USM(u, std::complex, cublasZdotu) #undef DOT_LAUNCHER_USM template -inline sycl::event rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 *x, - const int64_t incx, T1 *y, int64_t incy, T2 c, T3 s, - const std::vector &dependencies) { +inline sycl::event rot(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1* x, + const int64_t incx, T1* y, int64_t incy, T2 c, T3 s, + const std::vector& dependencies) { throw unimplemented("blas", "rot", "for row_major layout"); } #define ROT_LAUNCHER_USM(TYPE1, TYPE2, TYPE3, CUBLAS_ROUTINE) \ - sycl::event rot(sycl::queue &queue, int64_t n, TYPE1 *x, const int64_t incx, TYPE1 *y, \ + sycl::event rot(sycl::queue& queue, int64_t n, TYPE1* x, const int64_t incx, TYPE1* y, \ int64_t incy, TYPE2 c, TYPE3 s, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return rot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s, \ dependencies); \ } @@ -1746,26 +1746,26 @@ ROT_LAUNCHER_USM(std::complex, float, float, cublasCsrot) ROT_LAUNCHER_USM(std::complex, double, double, cublasZdrot) #undef ROT_LAUNCHER_USM -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { throw unimplemented("blas", "sdsdot", "for row_major layout"); } -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { throw unimplemented("blas", "dot", "for row_major layout"); } template -inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T *d1, T *d2, T *x1, - T y1, T *param, const std::vector &dependencies) { +inline sycl::event rotmg(const char* func_name, Func func, sycl::queue& queue, T* d1, T* d2, T* x1, + T y1, T* param, const std::vector& dependencies) { throw unimplemented("blas", "rotmg", "for row_major layout"); } #define ROTMG_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event rotmg(sycl::queue &queue, TYPE *d1, TYPE *d2, TYPE *x1, TYPE y1, TYPE *param, \ - const std::vector &dependencies) { \ + sycl::event rotmg(sycl::queue& queue, TYPE* d1, TYPE* d2, TYPE* x1, TYPE y1, TYPE* param, \ + const std::vector& dependencies) { \ return rotmg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, d1, d2, x1, y1, param, dependencies); \ } @@ -1774,15 +1774,15 @@ ROTMG_LAUNCHER_USM(double, cublasDrotmg) #undef ROTMG_LAUNCHER_USM template -inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T *x, const int64_t incx, int64_t *result, - const std::vector &dependencies) { +inline sycl::event iamax(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T* x, const int64_t incx, int64_t* result, + const std::vector& dependencies) { throw unimplemented("blas", "iamax", "for row_major layout"); } #define IAMAX_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event iamax(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamax(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamax(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } IAMAX_LAUNCHER_USM(float, cublasIsamax) @@ -1792,15 +1792,15 @@ IAMAX_LAUNCHER_USM(std::complex, cublasIzamax) #undef IAMAX_LAUNCHER_USM template -inline sycl::event swap(const char *func_name, Func func, sycl::queue &queue, int64_t n, T *x, - int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event swap(const char* func_name, Func func, sycl::queue& queue, int64_t n, T* x, + int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "swap", "for row_major layout"); } #define SWAP_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event swap(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event swap(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return swap(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1811,15 +1811,15 @@ SWAP_LAUNCHER_USM(std::complex, cublasZswap) #undef SWAP_LAUNCHER_USM template -inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T *x, const int64_t incx, int64_t *result, - const std::vector &dependencies) { +inline sycl::event iamin(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T* x, const int64_t incx, int64_t* result, + const std::vector& dependencies) { throw unimplemented("blas", "iamin", "for row_major layout"); } #define IAMIN_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event iamin(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamin(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamin(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } IAMIN_LAUNCHER_USM(float, cublasIsamin) @@ -1829,15 +1829,15 @@ IAMIN_LAUNCHER_USM(std::complex, cublasIzamin) #undef IAMIN_LAUNCHER_USM template -inline sycl::event nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T1 *x, const int64_t incx, T2 *result, - const std::vector &dependencies) { +inline sycl::event nrm2(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T1* x, const int64_t incx, T2* result, + const std::vector& dependencies) { throw unimplemented("blas", "nrm2", "for row_major layout"); } #define NRM2_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event nrm2(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event nrm2(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return nrm2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } NRM2_LAUNCHER_USM(float, float, cublasSnrm2) diff --git a/src/blas/backends/cublas/cublas_level2.cpp b/src/blas/backends/cublas/cublas_level2.cpp index 1bbfbc60f..87adc7fab 100644 --- a/src/blas/backends/cublas/cublas_level2.cpp +++ b/src/blas/backends/cublas/cublas_level2.cpp @@ -31,32 +31,32 @@ namespace column_major { // Buffer APIs template -inline void gemv(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gemv(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), m, - n, (cuDataType *)&alpha, a_, lda, x_, incx, - (cuDataType *)&beta, y_, incy); + n, (cuDataType*)&alpha, a_, lda, x_, incx, (cuDataType*)&beta, + y_, incy); }); }); } #define GEMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ gemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, \ incy); \ } @@ -68,32 +68,32 @@ GEMV_LAUNCHER(std::complex, cublasZgemv) #undef GEMV_LAUNCHER template -inline void gbmv(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, int64_t kl, int64_t ku, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gbmv(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, int64_t kl, int64_t ku, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, kl, ku, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), m, - n, kl, ku, (cuDataType *)&alpha, a_, lda, x_, incx, - (cuDataType *)&beta, y_, incy); + n, kl, ku, (cuDataType*)&alpha, a_, lda, x_, incx, + (cuDataType*)&beta, y_, incy); }); }); } #define GBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ - int64_t incx, TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ + int64_t incx, TYPE beta, sycl::buffer& y, int64_t incy) { \ gbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -105,30 +105,30 @@ GBMV_LAUNCHER(std::complex, cublasZgbmv) #undef GBMV_LAUNCHER template -inline void ger(const char *func_name, Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void ger(const char* func_name, Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - cublas_native_named_func(func_name, func, err, handle, m, n, (cuDataType *)&alpha, x_, + cublas_native_named_func(func_name, func, err, handle, m, n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define GER_LAUNCHER(EXT, TYPE, CUBLAS_ROUTINE) \ - void ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, \ + void ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, \ int64_t lda) { \ ger(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda); \ } @@ -142,32 +142,32 @@ GER_LAUNCHER(c, std::complex, cublasZgerc) #undef GER_LAUNCHER template -inline void hbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void hbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, - a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, k, (cuDataType*)&alpha, + a_, lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define HBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -177,32 +177,32 @@ HBMV_LAUNCHER(std::complex, cublasZhbmv) #undef HBMV_LAUNCHER template -inline void hemv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - T beta, sycl::buffer &y, int64_t incy) { +inline void hemv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define HEMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, \ y, incy); \ } @@ -212,31 +212,31 @@ HEMV_LAUNCHER(std::complex, cublasZhemv) #undef HEMV_LAUNCHER template -inline void her(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - ScalarType alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &a, int64_t lda) { +inline void her(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + ScalarType alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& a, int64_t lda) { using cuScalarType = typename CudaEquivalentType::Type; using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + get_cublas_fill_mode(upper_lower), n, (cuScalarType*)&alpha, x_, incx, a_, lda); }); }); } #define HER_LAUNCHER(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - void her(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, \ + void her(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, \ int64_t lda) { \ her(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -247,32 +247,32 @@ HER_LAUNCHER(double, std::complex, cublasZher) #undef HER_LAUNCHER template -inline void her2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void her2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define HER2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ her2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ lda); \ } @@ -283,32 +283,32 @@ HER2_LAUNCHER(std::complex, cublasZher2) #undef HER2_LAUNCHER template -inline void hpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define HPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ hpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, \ incy); \ } @@ -319,30 +319,30 @@ HPMV_LAUNCHER(std::complex, cublasZhpmv) #undef HPMV_LAUNCHER template -inline void hpr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - ScalarType alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &a) { +inline void hpr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + ScalarType alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& a) { using cuScalarType = typename CudaEquivalentType::Type; using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + get_cublas_fill_mode(upper_lower), n, (cuScalarType*)&alpha, x_, incx, a_); }); }); } #define HPR_LAUNCHER(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ hpr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -352,32 +352,32 @@ HPR_LAUNCHER(double, std::complex, cublasZhpr) #undef HPR_LAUNCHER template -inline void hpr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void hpr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_); }); }); } #define HPR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ hpr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -387,32 +387,32 @@ HPR2_LAUNCHER(std::complex, cublasZhpr2) #undef HPR2_LAUNCHER template -inline void sbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void sbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, - a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, k, (cuDataType*)&alpha, + a_, lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define SBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ sbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -423,32 +423,32 @@ SBMV_LAUNCHER(double, cublasDsbmv) #undef SBMV_LAUNCHER template -inline void symv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - T beta, sycl::buffer &y, int64_t incy) { +inline void symv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define SYMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ symv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, \ y, incy); \ } @@ -459,28 +459,28 @@ SYMV_LAUNCHER(double, cublasDsymv) #undef SYMV_LAUNCHER template -inline void syr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { +inline void syr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, a_, lda); }); }); } #define SYR_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { \ + void syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { \ syr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -492,32 +492,32 @@ SYR_LAUNCHER(std::complex, cublasZsyr) #undef SYR_LAUNCHER template -inline void syr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void syr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define SYR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ syr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ lda); \ } @@ -531,32 +531,32 @@ SYR2_LAUNCHER(std::complex, cublasZsyr2) #undef SYR2_LAUNCHER template -inline void spmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void spmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define SPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ spmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, \ incy); \ } @@ -567,28 +567,28 @@ SPMV_LAUNCHER(double, cublasDspmv) #undef SPMV_LAUNCHER template -inline void spr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void spr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& a) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, a_); }); }); } #define SPR_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ spr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -598,32 +598,32 @@ SPR_LAUNCHER(double, cublasDspr) #undef SPR_LAUNCHER template -inline void spr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void spr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_); }); }); } #define SPR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ spr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -633,18 +633,18 @@ SPR2_LAUNCHER(double, cublasDspr2) #undef SPR2_LAUNCHER template -inline void tbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx) { +inline void tbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -654,8 +654,8 @@ inline void tbmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, \ x, incx); \ @@ -669,18 +669,18 @@ TBMV_LAUNCHER(std::complex, cublasZtbmv) #undef TBMV_LAUNCHER template -inline void tbsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx) { +inline void tbsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -690,8 +690,8 @@ inline void tbsv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TBSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, \ x, incx); \ @@ -705,18 +705,18 @@ TBSV_LAUNCHER(std::complex, cublasZtbsv) #undef TBSV_LAUNCHER template -inline void tpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, - sycl::buffer &x, int64_t incx) { +inline void tpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, + sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -726,8 +726,8 @@ inline void tpmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, \ incx); \ } @@ -740,18 +740,18 @@ TPMV_LAUNCHER(std::complex, cublasZtpmv) #undef TPMV_LAUNCHER template -inline void tpsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, - sycl::buffer &x, int64_t incx) { +inline void tpsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, + sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -761,8 +761,8 @@ inline void tpsv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TPSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, \ incx); \ } @@ -775,18 +775,18 @@ TPSV_LAUNCHER(std::complex, cublasZtpsv) #undef TPSV_LAUNCHER template -inline void trmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { +inline void trmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -796,8 +796,8 @@ inline void trmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TRMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, \ incx); \ } @@ -810,18 +810,18 @@ TRMV_LAUNCHER(std::complex, cublasZtrmv) #undef TRMV_LAUNCHER template -inline void trsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { +inline void trsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -831,8 +831,8 @@ inline void trsv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TRSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, \ incx); \ } @@ -847,35 +847,35 @@ TRSV_LAUNCHER(std::complex, cublasZtrsv) // USM APIs template -inline sycl::event gemv(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gemv(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), m, - n, (cuDataType *)&alpha, a_, lda, x_, incx, - (cuDataType *)&beta, y_, incy); + n, (cuDataType*)&alpha, a_, lda, x_, incx, (cuDataType*)&beta, + y_, incy); }); }); return done; } #define GEMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return gemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, \ beta, y, incy, dependencies); \ } @@ -887,36 +887,36 @@ GEMV_LAUNCHER_USM(std::complex, cublasZgemv) #undef GEMV_LAUNCHER_USM template -inline sycl::event gbmv(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, int64_t kl, int64_t ku, T alpha, const T *a, - int64_t lda, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, int64_t kl, int64_t ku, T alpha, const T* a, + int64_t lda, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, kl, ku, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), m, - n, kl, ku, (cuDataType *)&alpha, a_, lda, x_, incx, - (cuDataType *)&beta, y_, incy); + n, kl, ku, (cuDataType*)&alpha, a_, lda, x_, incx, + (cuDataType*)&beta, y_, incy); }); }); return done; } #define GBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ - int64_t ku, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *x, \ - int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ + int64_t ku, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* x, \ + int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return gbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -928,23 +928,23 @@ GBMV_LAUNCHER_USM(std::complex, cublasZgbmv) #undef GBMV_LAUNCHER_USM template -inline sycl::event ger(const char *func_name, Func func, sycl::queue &queue, int64_t m, int64_t n, - T alpha, const T *x, int64_t incx, const T *y, int64_t incy, T *a, - int64_t lda, const std::vector &dependencies) { +inline sycl::event ger(const char* func_name, Func func, sycl::queue& queue, int64_t m, int64_t n, + T alpha, const T* x, int64_t incx, const T* y, int64_t incy, T* a, + int64_t lda, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; - cublas_native_named_func(func_name, func, err, handle, m, n, (cuDataType *)&alpha, x_, + cublas_native_named_func(func_name, func, err, handle, m, n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -952,9 +952,9 @@ inline sycl::event ger(const char *func_name, Func func, sycl::queue &queue, int } #define GER_LAUNCHER_USM(EXT, TYPE, CUBLAS_ROUTINE) \ - sycl::event ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return ger(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -968,35 +968,35 @@ GER_LAUNCHER_USM(c, std::complex, cublasZgerc) #undef GER_LAUNCHER_USM template -inline sycl::event hbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event hbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, - a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, k, (cuDataType*)&alpha, + a_, lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define HBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return hbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -1006,34 +1006,34 @@ HBMV_LAUNCHER_USM(std::complex, cublasZhbmv) #undef HBMV_LAUNCHER_USM template -inline sycl::event hemv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, int64_t lda, const T *x, int64_t incx, - T beta, T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event hemv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, int64_t lda, const T* x, int64_t incx, + T beta, T* y, int64_t incy, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define HEMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -1043,25 +1043,25 @@ HEMV_LAUNCHER_USM(std::complex, cublasZhemv) #undef HEMV_LAUNCHER_USM template -inline sycl::event her(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, const ScalarType alpha, const DataType *x, int64_t incx, - DataType *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event her(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, const ScalarType alpha, const DataType* x, int64_t incx, + DataType* a, int64_t lda, const std::vector& dependencies) { using cuScalarType = typename CudaEquivalentType::Type; using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + get_cublas_fill_mode(upper_lower), n, (cuScalarType*)&alpha, x_, incx, a_, lda); }); }); @@ -1069,9 +1069,9 @@ inline sycl::event her(const char *func_name, Func func, sycl::queue &queue, upl } #define HER_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, \ dependencies); \ } @@ -1082,24 +1082,24 @@ HER_LAUNCHER_USM(double, std::complex, cublasZher) #undef HER_LAUNCHER_USM template -inline sycl::event her2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event her2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, int64_t lda, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -1107,9 +1107,9 @@ inline sycl::event her2(const char *func_name, Func func, sycl::queue &queue, up } #define HER2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, lda, dependencies); \ } @@ -1120,34 +1120,34 @@ HER2_LAUNCHER_USM(std::complex, cublasZher2) #undef HER2_LAUNCHER_USM template -inline sycl::event hpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define HPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, \ beta, y, incy, dependencies); \ } @@ -1158,24 +1158,24 @@ HPMV_LAUNCHER_USM(std::complex, cublasZhpmv) #undef HPMV_LAUNCHER_USM template -inline sycl::event hpr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, const ScalarType alpha, const DataType *x, int64_t incx, - DataType *a, const std::vector &dependencies) { +inline sycl::event hpr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, const ScalarType alpha, const DataType* x, int64_t incx, + DataType* a, const std::vector& dependencies) { using cuScalarType = typename CudaEquivalentType::Type; using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + get_cublas_fill_mode(upper_lower), n, (cuScalarType*)&alpha, x_, incx, a_); }); }); @@ -1183,9 +1183,9 @@ inline sycl::event hpr(const char *func_name, Func func, sycl::queue &queue, upl } #define HPR_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, \ + const std::vector& dependencies) { \ return hpr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, \ dependencies); \ } @@ -1196,24 +1196,24 @@ HPR_LAUNCHER_USM(double, std::complex, cublasZhpr) #undef HPR_LAUNCHER_USM template -inline sycl::event hpr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, const std::vector &dependencies) { +inline sycl::event hpr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_); }); }); @@ -1221,9 +1221,9 @@ inline sycl::event hpr2(const char *func_name, Func func, sycl::queue &queue, up } #define HPR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return hpr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, dependencies); \ } @@ -1234,35 +1234,35 @@ HPR2_LAUNCHER_USM(std::complex, cublasZhpr2) #undef HPR2_LAUNCHER_USM template -inline sycl::event sbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event sbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, - a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, k, (cuDataType*)&alpha, + a_, lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define SBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return sbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -1273,34 +1273,34 @@ SBMV_LAUNCHER_USM(double, cublasDsbmv) #undef SBMV_LAUNCHER_USM template -inline sycl::event symv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, int64_t lda, const T *x, int64_t incx, - T beta, T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event symv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, int64_t lda, const T* x, int64_t incx, + T beta, T* y, int64_t incy, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define SYMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return symv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -1311,23 +1311,23 @@ SYMV_LAUNCHER_USM(double, cublasDsymv) #undef SYMV_LAUNCHER_USM template -inline sycl::event syr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, T* a, int64_t lda, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, a_, lda); }); }); @@ -1335,9 +1335,9 @@ inline sycl::event syr(const char *func_name, Func func, sycl::queue &queue, upl } #define SYR_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, \ dependencies); \ } @@ -1350,24 +1350,24 @@ SYR_LAUNCHER_USM(std::complex, cublasZsyr) #undef SYR_LAUNCHER_USM template -inline sycl::event syr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event syr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, int64_t lda, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -1375,9 +1375,9 @@ inline sycl::event syr2(const char *func_name, Func func, sycl::queue &queue, up } #define SYR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, lda, dependencies); \ } @@ -1391,34 +1391,34 @@ SYR2_LAUNCHER_USM(std::complex, cublasZsyr2) #undef SYR2_LAUNCHER_USM template -inline sycl::event spmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event spmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define SPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return spmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, \ beta, y, incy, dependencies); \ } @@ -1429,23 +1429,23 @@ SPMV_LAUNCHER_USM(double, cublasDspmv) #undef SPMV_LAUNCHER_USM template -inline sycl::event spr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, T *a, - const std::vector &dependencies) { +inline sycl::event spr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, T* a, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, a_); }); }); @@ -1453,8 +1453,8 @@ inline sycl::event spr(const char *func_name, Func func, sycl::queue &queue, upl } #define SPR_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, const std::vector &dependencies) { \ + sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, const std::vector& dependencies) { \ return spr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, \ dependencies); \ } @@ -1465,24 +1465,24 @@ SPR_LAUNCHER_USM(double, cublasDspr) #undef SPR_LAUNCHER_USM template -inline sycl::event spr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, const std::vector &dependencies) { +inline sycl::event spr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_); }); }); @@ -1490,9 +1490,9 @@ inline sycl::event spr2(const char *func_name, Func func, sycl::queue &queue, up } #define SPR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return spr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, dependencies); \ } @@ -1503,21 +1503,21 @@ SPR2_LAUNCHER_USM(double, cublasDspr2) #undef SPR2_LAUNCHER_USM template -inline sycl::event tbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, const T *a, - int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, const T* a, + int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1528,9 +1528,9 @@ inline sycl::event tbmv(const char *func_name, Func func, sycl::queue &queue, up } #define TBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, \ a, lda, x, incx, dependencies); \ } @@ -1543,21 +1543,21 @@ TBMV_LAUNCHER_USM(std::complex, cublasZtbmv) #undef TBMV_LAUNCHER_USM template -inline sycl::event tbsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, const T *a, - int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, const T* a, + int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1568,9 +1568,9 @@ inline sycl::event tbsv(const char *func_name, Func func, sycl::queue &queue, up } #define TBSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, \ a, lda, x, incx, dependencies); \ } @@ -1583,20 +1583,20 @@ TBSV_LAUNCHER_USM(std::complex, cublasZtbsv) #undef TBSV_LAUNCHER_USM template -inline sycl::event tpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1607,9 +1607,9 @@ inline sycl::event tpmv(const char *func_name, Func func, sycl::queue &queue, up } #define TPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ x, incx, dependencies); \ } @@ -1622,20 +1622,20 @@ TPMV_LAUNCHER_USM(std::complex, cublasZtpmv) #undef TPMV_LAUNCHER_USM template -inline sycl::event tpsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1646,9 +1646,9 @@ inline sycl::event tpsv(const char *func_name, Func func, sycl::queue &queue, up } #define TPSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ x, incx, dependencies); \ } @@ -1661,20 +1661,20 @@ TPSV_LAUNCHER_USM(std::complex, cublasZtpsv) #undef TPSV_LAUNCHER_USM template -inline sycl::event trmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event trmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1685,9 +1685,9 @@ inline sycl::event trmv(const char *func_name, Func func, sycl::queue &queue, up } #define TRMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ lda, x, incx, dependencies); \ } @@ -1700,20 +1700,20 @@ TRMV_LAUNCHER_USM(std::complex, cublasZtrmv) #undef TRMV_LAUNCHER_USM template -inline sycl::event trsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event trsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1724,9 +1724,9 @@ inline sycl::event trsv(const char *func_name, Func func, sycl::queue &queue, up } #define TRSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ lda, x, incx, dependencies); \ } @@ -1744,16 +1744,16 @@ namespace row_major { // Buffer APIs template -inline void gemv(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gemv(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "gemv", "for row_major layout"); } #define GEMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ gemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, \ incy); \ } @@ -1765,16 +1765,16 @@ GEMV_LAUNCHER(std::complex, cublasZgemv) #undef GEMV_LAUNCHER template -inline void gbmv(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, int64_t kl, int64_t ku, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gbmv(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, int64_t kl, int64_t ku, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx, T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "gbmv", "for row_major layout"); } #define GBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ - int64_t incx, TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ + int64_t incx, TYPE beta, sycl::buffer& y, int64_t incy) { \ gbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -1786,15 +1786,15 @@ GBMV_LAUNCHER(std::complex, cublasZgbmv) #undef GBMV_LAUNCHER template -inline void ger(const char *func_name, Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void ger(const char* func_name, Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "ger", "for row_major layout"); } #define GER_LAUNCHER(EXT, TYPE, CUBLAS_ROUTINE) \ - void ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, \ + void ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, \ int64_t lda) { \ ger(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda); \ } @@ -1808,16 +1808,16 @@ GER_LAUNCHER(c, std::complex, cublasZgerc) #undef GER_LAUNCHER template -inline void hbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void hbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "hbmv", "for row_major layout"); } #define HBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -1827,16 +1827,16 @@ HBMV_LAUNCHER(std::complex, cublasZhbmv) #undef HBMV_LAUNCHER template -inline void hemv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - T beta, sycl::buffer &y, int64_t incy) { +inline void hemv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "hemv", "for row_major layout"); } #define HEMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, \ y, incy); \ } @@ -1846,15 +1846,15 @@ HEMV_LAUNCHER(std::complex, cublasZhemv) #undef HEMV_LAUNCHER template -inline void her(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - ScalarType alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &a, int64_t lda) { +inline void her(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + ScalarType alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "her", "for row_major layout"); } #define HER_LAUNCHER(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - void her(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, \ + void her(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, \ int64_t lda) { \ her(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -1865,16 +1865,16 @@ HER_LAUNCHER(double, std::complex, cublasZher) #undef HER_LAUNCHER template -inline void her2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void her2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "her2", "for row_major layout"); } #define HER2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ her2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ lda); \ } @@ -1885,16 +1885,16 @@ HER2_LAUNCHER(std::complex, cublasZher2) #undef HER2_LAUNCHER template -inline void hpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "hpmv", "for row_major layout"); } #define HPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ hpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, \ incy); \ } @@ -1905,15 +1905,15 @@ HPMV_LAUNCHER(std::complex, cublasZhpmv) #undef HPMV_LAUNCHER template -inline void hpr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - ScalarType alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &a) { +inline void hpr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + ScalarType alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& a) { throw unimplemented("blas", "hpr", "for row_major layout"); } #define HPR_LAUNCHER(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ hpr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -1923,16 +1923,16 @@ HPR_LAUNCHER(double, std::complex, cublasZhpr) #undef HPR_LAUNCHER template -inline void hpr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void hpr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { throw unimplemented("blas", "hpr2", "for row_major layout"); } #define HPR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ hpr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -1942,16 +1942,16 @@ HPR2_LAUNCHER(std::complex, cublasZhpr2) #undef HPR2_LAUNCHER template -inline void sbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void sbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "sbmv", "for row_major layout"); } #define SBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ sbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -1962,16 +1962,16 @@ SBMV_LAUNCHER(double, cublasDsbmv) #undef SBMV_LAUNCHER template -inline void symv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - T beta, sycl::buffer &y, int64_t incy) { +inline void symv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "symv", "for row_major layout"); } #define SYMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ symv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, \ y, incy); \ } @@ -1982,14 +1982,14 @@ SYMV_LAUNCHER(double, cublasDsymv) #undef SYMV_LAUNCHER template -inline void syr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { +inline void syr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "syr", "for row_major layout"); } #define SYR_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { \ + void syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { \ syr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -2001,16 +2001,16 @@ SYR_LAUNCHER(std::complex, cublasZsyr) #undef SYR_LAUNCHER template -inline void syr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void syr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "syr2", "for row_major layout"); } #define SYR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ syr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ lda); \ } @@ -2024,16 +2024,16 @@ SYR2_LAUNCHER(std::complex, cublasZsyr2) #undef SYR2_LAUNCHER template -inline void spmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void spmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "spmv", "for row_major layout"); } #define SPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ spmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, \ incy); \ } @@ -2044,14 +2044,14 @@ SPMV_LAUNCHER(double, cublasDspmv) #undef SPMV_LAUNCHER template -inline void spr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void spr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& a) { throw unimplemented("blas", "spr", "for row_major layout"); } #define SPR_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ spr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -2061,16 +2061,16 @@ SPR_LAUNCHER(double, cublasDspr) #undef SPR_LAUNCHER template -inline void spr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void spr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { throw unimplemented("blas", "spr2", "for row_major layout"); } #define SPR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ spr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -2080,15 +2080,15 @@ SPR2_LAUNCHER(double, cublasDspr2) #undef SPR2_LAUNCHER template -inline void tbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx) { +inline void tbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "tbmv", "for row_major layout"); } #define TBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, \ x, incx); \ @@ -2102,15 +2102,15 @@ TBMV_LAUNCHER(std::complex, cublasZtbmv) #undef TBMV_LAUNCHER template -inline void tbsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx) { +inline void tbsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "tbsv", "for row_major layout"); } #define TBSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, \ x, incx); \ @@ -2124,15 +2124,15 @@ TBSV_LAUNCHER(std::complex, cublasZtbsv) #undef TBSV_LAUNCHER template -inline void tpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, - sycl::buffer &x, int64_t incx) { +inline void tpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "tpmv", "for row_major layout"); } #define TPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, \ incx); \ } @@ -2145,15 +2145,15 @@ TPMV_LAUNCHER(std::complex, cublasZtpmv) #undef TPMV_LAUNCHER template -inline void tpsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, - sycl::buffer &x, int64_t incx) { +inline void tpsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "tpsv", "for row_major layout"); } #define TPSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, \ incx); \ } @@ -2166,15 +2166,15 @@ TPSV_LAUNCHER(std::complex, cublasZtpsv) #undef TPSV_LAUNCHER template -inline void trmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { +inline void trmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "trmv", "for row_major layout"); } #define TRMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, \ incx); \ } @@ -2187,15 +2187,15 @@ TRMV_LAUNCHER(std::complex, cublasZtrmv) #undef TRMV_LAUNCHER template -inline void trsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { +inline void trsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "trsv", "for row_major layout"); } #define TRSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, \ incx); \ } @@ -2210,17 +2210,17 @@ TRSV_LAUNCHER(std::complex, cublasZtrsv) // USM APIs template -inline sycl::event gemv(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gemv(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "gemv", "for row_major layout"); } #define GEMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return gemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, \ beta, y, incy, dependencies); \ } @@ -2232,18 +2232,18 @@ GEMV_LAUNCHER_USM(std::complex, cublasZgemv) #undef GEMV_LAUNCHER_USM template -inline sycl::event gbmv(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, int64_t kl, int64_t ku, T alpha, const T *a, - int64_t lda, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, int64_t kl, int64_t ku, T alpha, const T* a, + int64_t lda, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "gbmv", "for row_major layout"); } #define GBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ - int64_t ku, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *x, \ - int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ + int64_t ku, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* x, \ + int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return gbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2255,16 +2255,16 @@ GBMV_LAUNCHER_USM(std::complex, cublasZgbmv) #undef GBMV_LAUNCHER_USM template -inline sycl::event ger(const char *func_name, Func func, sycl::queue &queue, int64_t m, int64_t n, - T alpha, const T *x, int64_t incx, const T *y, int64_t incy, T *a, - int64_t lda, const std::vector &dependencies) { +inline sycl::event ger(const char* func_name, Func func, sycl::queue& queue, int64_t m, int64_t n, + T alpha, const T* x, int64_t incx, const T* y, int64_t incy, T* a, + int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "ger", "for row_major layout"); } #define GER_LAUNCHER_USM(EXT, TYPE, CUBLAS_ROUTINE) \ - sycl::event ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return ger(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -2278,17 +2278,17 @@ GER_LAUNCHER_USM(c, std::complex, cublasZgerc) #undef GER_LAUNCHER_USM template -inline sycl::event hbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event hbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "hbmv", "for row_major layout"); } #define HBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return hbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2298,16 +2298,16 @@ HBMV_LAUNCHER_USM(std::complex, cublasZhbmv) #undef HBMV_LAUNCHER_USM template -inline sycl::event hemv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, int64_t lda, const T *x, int64_t incx, - T beta, T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event hemv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, int64_t lda, const T* x, int64_t incx, + T beta, T* y, int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "hemv", "for row_major layout"); } #define HEMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2317,16 +2317,16 @@ HEMV_LAUNCHER_USM(std::complex, cublasZhemv) #undef HEMV_LAUNCHER_USM template -inline sycl::event her(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, const ScalarType alpha, const DataType *x, int64_t incx, - DataType *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event her(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, const ScalarType alpha, const DataType* x, int64_t incx, + DataType* a, int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "her", "for row_major layout"); } #define HER_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, \ dependencies); \ } @@ -2337,16 +2337,16 @@ HER_LAUNCHER_USM(double, std::complex, cublasZher) #undef HER_LAUNCHER_USM template -inline sycl::event her2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event her2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "her2", "for row_major layout"); } #define HER2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, lda, dependencies); \ } @@ -2357,16 +2357,16 @@ HER2_LAUNCHER_USM(std::complex, cublasZher2) #undef HER2_LAUNCHER_USM template -inline sycl::event hpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "hpmv", "for row_major layout"); } #define HPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, \ beta, y, incy, dependencies); \ } @@ -2377,16 +2377,16 @@ HPMV_LAUNCHER_USM(std::complex, cublasZhpmv) #undef HPMV_LAUNCHER_USM template -inline sycl::event hpr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, const ScalarType alpha, const DataType *x, int64_t incx, - DataType *a, const std::vector &dependencies) { +inline sycl::event hpr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, const ScalarType alpha, const DataType* x, int64_t incx, + DataType* a, const std::vector& dependencies) { throw unimplemented("blas", "hpr", "for row_major layout"); } #define HPR_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, \ + const std::vector& dependencies) { \ return hpr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, \ dependencies); \ } @@ -2397,16 +2397,16 @@ HPR_LAUNCHER_USM(double, std::complex, cublasZhpr) #undef HPR_LAUNCHER_USM template -inline sycl::event hpr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, const std::vector &dependencies) { +inline sycl::event hpr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, const std::vector& dependencies) { throw unimplemented("blas", "hpr2", "for row_major layout"); } #define HPR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return hpr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, dependencies); \ } @@ -2417,17 +2417,17 @@ HPR2_LAUNCHER_USM(std::complex, cublasZhpr2) #undef HPR2_LAUNCHER_USM template -inline sycl::event sbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event sbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "sbmv", "for row_major layout"); } #define SBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return sbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2438,16 +2438,16 @@ SBMV_LAUNCHER_USM(double, cublasDsbmv) #undef SBMV_LAUNCHER_USM template -inline sycl::event symv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, int64_t lda, const T *x, int64_t incx, - T beta, T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event symv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, int64_t lda, const T* x, int64_t incx, + T beta, T* y, int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "symv", "for row_major layout"); } #define SYMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return symv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2458,16 +2458,16 @@ SYMV_LAUNCHER_USM(double, cublasDsymv) #undef SYMV_LAUNCHER_USM template -inline sycl::event syr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, T* a, int64_t lda, + const std::vector& dependencies) { throw unimplemented("blas", "syr", "for row_major layout"); } #define SYR_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, \ dependencies); \ } @@ -2480,16 +2480,16 @@ SYR_LAUNCHER_USM(std::complex, cublasZsyr) #undef SYR_LAUNCHER_USM template -inline sycl::event syr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event syr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "syr2", "for row_major layout"); } #define SYR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, lda, dependencies); \ } @@ -2503,16 +2503,16 @@ SYR2_LAUNCHER_USM(std::complex, cublasZsyr2) #undef SYR2_LAUNCHER_USM template -inline sycl::event spmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event spmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "spmv", "for row_major layout"); } #define SPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return spmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, \ beta, y, incy, dependencies); \ } @@ -2523,15 +2523,15 @@ SPMV_LAUNCHER_USM(double, cublasDspmv) #undef SPMV_LAUNCHER_USM template -inline sycl::event spr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, T *a, - const std::vector &dependencies) { +inline sycl::event spr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, T* a, + const std::vector& dependencies) { throw unimplemented("blas", "spr", "for row_major layout"); } #define SPR_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, const std::vector &dependencies) { \ + sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, const std::vector& dependencies) { \ return spr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, \ dependencies); \ } @@ -2542,16 +2542,16 @@ SPR_LAUNCHER_USM(double, cublasDspr) #undef SPR_LAUNCHER_USM template -inline sycl::event spr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, const std::vector &dependencies) { +inline sycl::event spr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, const std::vector& dependencies) { throw unimplemented("blas", "spr2", "for row_major layout"); } #define SPR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return spr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, dependencies); \ } @@ -2562,17 +2562,17 @@ SPR2_LAUNCHER_USM(double, cublasDspr2) #undef SPR2_LAUNCHER_USM template -inline sycl::event tbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, const T *a, - int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, const T* a, + int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tbmv", "for row_major layout"); } #define TBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, \ a, lda, x, incx, dependencies); \ } @@ -2585,17 +2585,17 @@ TBMV_LAUNCHER_USM(std::complex, cublasZtbmv) #undef TBMV_LAUNCHER_USM template -inline sycl::event tbsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, const T *a, - int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, const T* a, + int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tbsv", "for row_major layout"); } #define TBSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, \ a, lda, x, incx, dependencies); \ } @@ -2608,16 +2608,16 @@ TBSV_LAUNCHER_USM(std::complex, cublasZtbsv) #undef TBSV_LAUNCHER_USM template -inline sycl::event tpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tpmv", "for row_major layout"); } #define TPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ x, incx, dependencies); \ } @@ -2630,16 +2630,16 @@ TPMV_LAUNCHER_USM(std::complex, cublasZtpmv) #undef TPMV_LAUNCHER_USM template -inline sycl::event tpsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tpsv", "for row_major layout"); } #define TPSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ x, incx, dependencies); \ } @@ -2652,16 +2652,16 @@ TPSV_LAUNCHER_USM(std::complex, cublasZtpsv) #undef TPSV_LAUNCHER_USM template -inline sycl::event trmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event trmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "trmv", "for row_major layout"); } #define TRMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ lda, x, incx, dependencies); \ } @@ -2674,16 +2674,16 @@ TRMV_LAUNCHER_USM(std::complex, cublasZtrmv) #undef TRMV_LAUNCHER_USM template -inline sycl::event trsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event trsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "trsv", "for row_major layout"); } #define TRSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ lda, x, incx, dependencies); \ } diff --git a/src/blas/backends/cublas/cublas_level3.cpp b/src/blas/backends/cublas/cublas_level3.cpp index ec6dc5192..58d1f4273 100644 --- a/src/blas/backends/cublas/cublas_level3.cpp +++ b/src/blas/backends/cublas/cublas_level3.cpp @@ -31,33 +31,33 @@ namespace column_major { // Buffer APIs template -inline void gemm(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, int64_t k, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void gemm(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, int64_t k, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_operation(transb), m, n, k, (cuDataType*)&alpha, a_, + lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); } #define GEMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE beta, sycl::buffer& c, \ int64_t ldc) { \ gemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ @@ -72,15 +72,15 @@ GEMM_LAUNCHER(std::complex, cublasZgemm) template -inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::queue &queue, +inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, T_C alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - T_C beta, sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + T_C beta, sycl::buffer& c, int64_t ldc) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; using cuDataType_C = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { if (!verify_support(queue, sycl::aspect::fp16)) { throw oneapi::math::unimplemented( "blas", "sycl::half", "half is not supported by the device or the sycl compiler"); @@ -88,17 +88,17 @@ inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::que auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND CUBLAS_ERROR_FUNC_SYNC(cublasGemmEx, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, - a_, DT_A, lda, b_, DT_B, ldb, (cuDataType_C *)&beta, c_, DT_C, - ldc, DT_C, CUBLAS_GEMM_DEFAULT); + get_cublas_operation(transb), m, n, k, (cuDataType_C*)&alpha, a_, + DT_A, lda, b_, DT_B, ldb, (cuDataType_C*)&beta, c_, DT_C, ldc, + DT_C, CUBLAS_GEMM_DEFAULT); #else CUBLAS_ERROR_FUNC(cublasGemmEx, err, handle, get_cublas_operation(transa), get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, @@ -110,9 +110,9 @@ inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::que } #define GEMM_EX_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_C alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE_C beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_C alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE_C beta, sycl::buffer& c, \ int64_t ldc) { \ gemm_ex(CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C, queue, transa, transb, m, n, k, \ alpha, a, lda, b, ldb, beta, c, ldc); \ @@ -123,40 +123,40 @@ GEMM_EX_LAUNCHER(sycl::half, sycl::half, sycl::half, CUDA_R_16F, CUDA_R_16F, CUD #undef GEMM_EX_LAUNCHER -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemm", "for column_major layout"); } template -inline void symm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void symm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_fill_mode(upper_lower), m, n, (cuDataType*)&alpha, + a_, lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); } #define SYMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ symm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, \ b, ldb, beta, c, ldc); \ } @@ -169,33 +169,33 @@ SYMM_LAUNCHER(std::complex, cublasZsymm) #undef SYMM_LAUNCHER template -inline void hemm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void hemm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_fill_mode(upper_lower), m, n, (cuDataType*)&alpha, + a_, lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); } #define HEMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ hemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, \ b, ldb, beta, c, ldc); \ } @@ -205,31 +205,31 @@ HEMM_LAUNCHER(std::complex, cublasZhemm) #undef HEMM_LAUNCHER template -inline void syrk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer &a, int64_t lda, - T beta, sycl::buffer &c, int64_t ldc) { +inline void syrk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer& a, int64_t lda, + T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, (cuDataType *)&beta, c_, + n, k, (cuDataType*)&alpha, a_, lda, (cuDataType*)&beta, c_, ldc); }); }); } #define SYRK_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ syrk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ beta, c, ldc); \ } @@ -242,33 +242,33 @@ SYRK_LAUNCHER(std::complex, cublasZsyrk) #undef SYRK_LAUNCHER template -inline void herk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline void herk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, ScalarType alpha, - sycl::buffer &a, int64_t lda, ScalarType beta, - sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, ScalarType beta, + sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; using cuScalarType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuScalarType *)&alpha, a_, lda, (cuScalarType *)&beta, - c_, ldc); + n, k, (cuScalarType*)&alpha, a_, lda, (cuScalarType*)&beta, c_, + ldc); }); }); } #define HERK_LAUNCHER(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - SCALAR_TYPE alpha, sycl::buffer &a, int64_t lda, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + SCALAR_TYPE alpha, sycl::buffer& a, int64_t lda, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ herk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ beta, c, ldc); \ } @@ -279,34 +279,34 @@ HERK_LAUNCHER(std::complex, double, cublasZherk) #undef HERK_LAUNCHER template -inline void syr2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void syr2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, - (cuDataType *)&beta, c_, ldc); + n, k, (cuDataType*)&alpha, a_, lda, b_, ldb, + (cuDataType*)&beta, c_, ldc); }); }); } #define SYR2K_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ syr2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ } @@ -318,36 +318,36 @@ SYR2K_LAUNCHER(std::complex, cublasZsyr2k) #undef SYR2K_LAUNCHER template -inline void her2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline void her2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, DataType alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, ScalarType beta, sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, ScalarType beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; using cuScalarType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, - (cuScalarType *)&beta, c_, ldc); + n, k, (cuDataType*)&alpha, a_, lda, b_, ldb, + (cuScalarType*)&beta, c_, ldc); }); }); } #define HER2K_LAUNCHER(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - DATA_TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + DATA_TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ her2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ } @@ -362,31 +362,31 @@ HER2K_LAUNCHER(std::complex, double, cublasZher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline void trmm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline void trmm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, b_, ldb); + get_cublas_diag_type(unit_diag), m, n, (cuDataType*)&alpha, a_, + lda, b_, ldb, b_, ldb); }); }); } #define TRMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trmm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, \ n, alpha, a, lda, b, ldb); \ } @@ -398,31 +398,31 @@ TRMM_LAUNCHER(std::complex, cublasZtrmm) #undef TRMM_LAUNCHER template -inline void trsm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline void trsm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb); + get_cublas_diag_type(unit_diag), m, n, (cuDataType*)&alpha, a_, + lda, b_, ldb); }); }); } #define TRSM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trsm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, \ n, alpha, a, lda, b, ldb); \ } @@ -436,36 +436,36 @@ TRSM_LAUNCHER(std::complex, cublasZtrsm) // USM APIs template -inline sycl::event gemm(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, int64_t k, T alpha, const T *a, - int64_t lda, const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event gemm(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, int64_t k, T alpha, const T* a, + int64_t lda, const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_operation(transb), m, n, k, (cuDataType*)&alpha, a_, + lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); return done; } #define GEMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, \ - int64_t ldb, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, \ + int64_t ldb, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -480,30 +480,30 @@ GEMM_LAUNCHER_USM(std::complex, cublasZgemm) template inline sycl::event gemm_ex_usm(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, - sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, T_C alpha, const T_A *a, int64_t lda, - const T_B *b, int64_t ldb, T_C beta, T_C *c, int64_t ldc, - const std::vector &dependencies) { + sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, T_C alpha, const T_A* a, int64_t lda, + const T_B* b, int64_t ldb, T_C beta, T_C* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; using cuDataType_C = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND CUBLAS_ERROR_FUNC_SYNC(cublasGemmEx, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, - a_, DT_A, lda, b_, DT_B, ldb, (cuDataType_C *)&beta, c_, DT_C, - ldc, DT_C, CUBLAS_GEMM_DEFAULT); + get_cublas_operation(transb), m, n, k, (cuDataType_C*)&alpha, a_, + DT_A, lda, b_, DT_B, ldb, (cuDataType_C*)&beta, c_, DT_C, ldc, + DT_C, CUBLAS_GEMM_DEFAULT); #else CUBLAS_ERROR_FUNC(cublasGemmEx, err, handle, get_cublas_operation(transa), get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, @@ -517,10 +517,10 @@ inline sycl::event gemm_ex_usm(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C #define GEMM_EX_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, CUDADATATYPE_A, CUDADATATYPE_B, \ CUDADATATYPE_C) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_C alpha, const TYPE_A *a, int64_t lda, const TYPE_B *b, \ - int64_t ldb, TYPE_C beta, TYPE_C *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_C alpha, const TYPE_A* a, int64_t lda, const TYPE_B* b, \ + int64_t ldb, TYPE_C beta, TYPE_C* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm_ex_usm(CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C, queue, transa, transb, \ m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -530,44 +530,44 @@ GEMM_EX_LAUNCHER_USM(sycl::half, sycl::half, sycl::half, CUDA_R_16F, CUDA_R_16F, #undef GEMM_EX_LAUNCHER_USM -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const bfloat16 *a, int64_t lda, const bfloat16 *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const bfloat16* a, int64_t lda, const bfloat16* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", "for column_major layout"); } template -inline sycl::event symm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event symm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_fill_mode(upper_lower), m, n, (cuDataType*)&alpha, + a_, lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); return done; } #define SYMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return symm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, \ a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -580,36 +580,36 @@ SYMM_LAUNCHER_USM(std::complex, cublasZsymm) #undef SYMM_LAUNCHER_USM template -inline sycl::event hemm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event hemm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_fill_mode(upper_lower), m, n, (cuDataType*)&alpha, + a_, lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); return done; } #define HEMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return hemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, \ a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -619,24 +619,24 @@ HEMM_LAUNCHER_USM(std::complex, cublasZhemm) #undef HEMM_LAUNCHER_USM template -inline sycl::event syrk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event syrk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, (cuDataType *)&beta, c_, + n, k, (cuDataType*)&alpha, a_, lda, (cuDataType*)&beta, c_, ldc); }); }); @@ -644,9 +644,9 @@ inline sycl::event syrk(const char *func_name, Func func, sycl::queue &queue, up } #define SYRK_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syrk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, beta, c, ldc, dependencies); \ } @@ -659,37 +659,37 @@ SYRK_LAUNCHER_USM(std::complex, cublasZsyrk) #undef SYRK_LAUNCHER_USM template -inline sycl::event herk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline sycl::event herk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, const ScalarType alpha, - const DataType *a, int64_t lda, const ScalarType beta, DataType *c, - int64_t ldc, const std::vector &dependencies) { + const DataType* a, int64_t lda, const ScalarType beta, DataType* c, + int64_t ldc, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; using cuScalarType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuScalarType *)&alpha, a_, lda, (cuScalarType *)&beta, - c_, ldc); + n, k, (cuScalarType*)&alpha, a_, lda, (cuScalarType*)&beta, c_, + ldc); }); }); return done; } #define HERK_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const SCALAR_TYPE alpha, const DATA_TYPE *a, int64_t lda, \ - const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const SCALAR_TYPE alpha, const DATA_TYPE* a, int64_t lda, \ + const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return herk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, beta, c, ldc, dependencies); \ } @@ -700,37 +700,37 @@ HERK_LAUNCHER_USM(std::complex, double, cublasZherk) #undef HERK_LAUNCHER_USM template -inline sycl::event syr2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syr2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, - (cuDataType *)&beta, c_, ldc); + n, k, (cuDataType*)&alpha, a_, lda, b_, ldb, + (cuDataType*)&beta, c_, ldc); }); }); return done; } #define SYR2K_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syr2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -742,39 +742,39 @@ SYR2K_LAUNCHER_USM(std::complex, cublasZsyr2k) #undef SYR2K_LAUNCHER_USM template -inline sycl::event her2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline sycl::event her2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, const DataType alpha, - const DataType *a, int64_t lda, const DataType *b, int64_t ldb, - const ScalarType beta, DataType *c, int64_t ldc, - const std::vector &dependencies) { + const DataType* a, int64_t lda, const DataType* b, int64_t ldb, + const ScalarType beta, DataType* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; using cuScalarType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, - (cuScalarType *)&beta, c_, ldc); + n, k, (cuDataType*)&alpha, a_, lda, b_, ldb, + (cuScalarType*)&beta, c_, ldc); }); }); return done; } #define HER2K_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const DATA_TYPE alpha, const DATA_TYPE *a, int64_t lda, const DATA_TYPE *b, \ - int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const DATA_TYPE alpha, const DATA_TYPE* a, int64_t lda, const DATA_TYPE* b, \ + int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return her2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -789,35 +789,35 @@ HER2K_LAUNCHER_USM(std::complex, double, cublasZher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline sycl::event trmm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline sycl::event trmm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { + T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, b_, ldb); + get_cublas_diag_type(unit_diag), m, n, (cuDataType*)&alpha, a_, + lda, b_, ldb, b_, ldb); }); }); return done; } #define TRMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trmm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -829,35 +829,35 @@ TRMM_LAUNCHER_USM(std::complex, cublasZtrmm) #undef TRMM_LAUNCHER_USM template -inline sycl::event trsm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline sycl::event trsm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { + T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemath_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb); + get_cublas_diag_type(unit_diag), m, n, (cuDataType*)&alpha, a_, + lda, b_, ldb); }); }); return done; } #define TRSM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trsm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -874,17 +874,17 @@ namespace row_major { // Buffer APIs template -inline void gemm(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, int64_t k, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void gemm(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, int64_t k, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemm", "for row_major layout"); } #define GEMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE beta, sycl::buffer& c, \ int64_t ldc) { \ gemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ @@ -899,17 +899,17 @@ GEMM_LAUNCHER(std::complex, cublasZgemm) template -inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::queue &queue, +inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, T_C alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - T_C beta, sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + T_C beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemm", "for row_major layout"); } #define GEMM_EX_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_C alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE_C beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_C alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE_C beta, sycl::buffer& c, \ int64_t ldc) { \ gemm_ex(CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C, queue, transa, transb, m, n, k, \ alpha, a, lda, b, ldb, beta, c, ldc); \ @@ -920,24 +920,24 @@ GEMM_EX_LAUNCHER(sycl::half, sycl::half, sycl::half, CUDA_R_16F, CUDA_R_16F, CUD #undef GEMM_EX_LAUNCHER -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemm", "for row_major layout"); } template -inline void symm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void symm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "symm", "for row_major layout"); } #define SYMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ symm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, \ b, ldb, beta, c, ldc); \ } @@ -950,17 +950,17 @@ SYMM_LAUNCHER(std::complex, cublasZsymm) #undef SYMM_LAUNCHER template -inline void hemm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void hemm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "hemm", "for row_major layout"); } #define HEMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ hemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, \ b, ldb, beta, c, ldc); \ } @@ -970,16 +970,16 @@ HEMM_LAUNCHER(std::complex, cublasZhemm) #undef HEMM_LAUNCHER template -inline void syrk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer &a, int64_t lda, - T beta, sycl::buffer &c, int64_t ldc) { +inline void syrk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer& a, int64_t lda, + T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "syrk", "for row_major layout"); } #define SYRK_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ syrk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ beta, c, ldc); \ } @@ -992,17 +992,17 @@ SYRK_LAUNCHER(std::complex, cublasZsyrk) #undef SYRK_LAUNCHER template -inline void herk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline void herk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, ScalarType alpha, - sycl::buffer &a, int64_t lda, ScalarType beta, - sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, ScalarType beta, + sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "herk", "for row_major layout"); } #define HERK_LAUNCHER(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - SCALAR_TYPE alpha, sycl::buffer &a, int64_t lda, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + SCALAR_TYPE alpha, sycl::buffer& a, int64_t lda, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ herk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ beta, c, ldc); \ } @@ -1013,17 +1013,17 @@ HERK_LAUNCHER(std::complex, double, cublasZherk) #undef HERK_LAUNCHER template -inline void syr2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void syr2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "syr2k", "for row_major layout"); } #define SYR2K_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ syr2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ } @@ -1035,18 +1035,18 @@ SYR2K_LAUNCHER(std::complex, cublasZsyr2k) #undef SYR2K_LAUNCHER template -inline void her2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline void her2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, DataType alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, ScalarType beta, sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, ScalarType beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "her2k", "for row_major layout"); } #define HER2K_LAUNCHER(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - DATA_TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + DATA_TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ her2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ } @@ -1061,16 +1061,16 @@ HER2K_LAUNCHER(std::complex, double, cublasZher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline void trmm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline void trmm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { throw unimplemented("blas", "trmm", "for row_major layout"); } #define TRMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trmm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, \ n, alpha, a, lda, b, ldb); \ } @@ -1082,16 +1082,16 @@ TRMM_LAUNCHER(std::complex, cublasZtrmm) #undef TRMM_LAUNCHER template -inline void trsm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline void trsm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { throw unimplemented("blas", "trsm", "for row_major layout"); } #define TRSM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trsm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, \ n, alpha, a, lda, b, ldb); \ } @@ -1105,18 +1105,18 @@ TRSM_LAUNCHER(std::complex, cublasZtrsm) // USM APIs template -inline sycl::event gemm(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, int64_t k, T alpha, const T *a, - int64_t lda, const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event gemm(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, int64_t k, T alpha, const T* a, + int64_t lda, const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", "for row_major layout"); } #define GEMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, \ - int64_t ldb, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, \ + int64_t ldb, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1131,19 +1131,19 @@ GEMM_LAUNCHER_USM(std::complex, cublasZgemm) template inline sycl::event gemm_ex_usm(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, - sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, T_C alpha, const T_A *a, int64_t lda, - const T_B *b, int64_t ldb, T_C beta, T_C *c, int64_t ldc, - const std::vector &dependencies) { + sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, T_C alpha, const T_A* a, int64_t lda, + const T_B* b, int64_t ldb, T_C beta, T_C* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", "for row_major layout"); } #define GEMM_EX_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, CUDADATATYPE_A, CUDADATATYPE_B, \ CUDADATATYPE_C) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_C alpha, const TYPE_A *a, int64_t lda, const TYPE_B *b, \ - int64_t ldb, TYPE_C beta, TYPE_C *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_C alpha, const TYPE_A* a, int64_t lda, const TYPE_B* b, \ + int64_t ldb, TYPE_C beta, TYPE_C* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm_ex_usm(CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C, queue, transa, transb, \ m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1153,26 +1153,26 @@ GEMM_EX_LAUNCHER_USM(sycl::half, sycl::half, sycl::half, CUDA_R_16F, CUDA_R_16F, #undef GEMM_EX_LAUNCHER_USM -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const bfloat16 *a, int64_t lda, const bfloat16 *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const bfloat16* a, int64_t lda, const bfloat16* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", "for row_major layout"); } template -inline sycl::event symm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event symm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "symm", "for row_major layout"); } #define SYMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return symm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, \ a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1185,18 +1185,18 @@ SYMM_LAUNCHER_USM(std::complex, cublasZsymm) #undef SYMM_LAUNCHER_USM template -inline sycl::event hemm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event hemm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "hemm", "for row_major layout"); } #define HEMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return hemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, \ a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1206,16 +1206,16 @@ HEMM_LAUNCHER_USM(std::complex, cublasZhemm) #undef HEMM_LAUNCHER_USM template -inline sycl::event syrk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event syrk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "syrk", "for row_major layout"); } #define SYRK_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syrk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, beta, c, ldc, dependencies); \ } @@ -1228,18 +1228,18 @@ SYRK_LAUNCHER_USM(std::complex, cublasZsyrk) #undef SYRK_LAUNCHER_USM template -inline sycl::event herk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline sycl::event herk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, const ScalarType alpha, - const DataType *a, int64_t lda, const ScalarType beta, DataType *c, - int64_t ldc, const std::vector &dependencies) { + const DataType* a, int64_t lda, const ScalarType beta, DataType* c, + int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "herk", "for row_major layout"); } #define HERK_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const SCALAR_TYPE alpha, const DATA_TYPE *a, int64_t lda, \ - const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const SCALAR_TYPE alpha, const DATA_TYPE* a, int64_t lda, \ + const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return herk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, beta, c, ldc, dependencies); \ } @@ -1250,18 +1250,18 @@ HERK_LAUNCHER_USM(std::complex, double, cublasZherk) #undef HERK_LAUNCHER_USM template -inline sycl::event syr2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syr2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "syr2k", "for row_major layout"); } #define SYR2K_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syr2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1273,19 +1273,19 @@ SYR2K_LAUNCHER_USM(std::complex, cublasZsyr2k) #undef SYR2K_LAUNCHER_USM template -inline sycl::event her2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline sycl::event her2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, const DataType alpha, - const DataType *a, int64_t lda, const DataType *b, int64_t ldb, - const ScalarType beta, DataType *c, int64_t ldc, - const std::vector &dependencies) { + const DataType* a, int64_t lda, const DataType* b, int64_t ldb, + const ScalarType beta, DataType* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "her2k", "for row_major layout"); } #define HER2K_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const DATA_TYPE alpha, const DATA_TYPE *a, int64_t lda, const DATA_TYPE *b, \ - int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const DATA_TYPE alpha, const DATA_TYPE* a, int64_t lda, const DATA_TYPE* b, \ + int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return her2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1300,17 +1300,17 @@ HER2K_LAUNCHER_USM(std::complex, double, cublasZher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline sycl::event trmm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline sycl::event trmm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { + T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "trmm", "for row_major layout"); } #define TRMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trmm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -1322,17 +1322,17 @@ TRMM_LAUNCHER_USM(std::complex, cublasZtrmm) #undef TRMM_LAUNCHER_USM template -inline sycl::event trsm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline sycl::event trsm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { + T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "trsm", "for row_major layout"); } #define TRSM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trsm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); \ } diff --git a/src/blas/backends/cublas/cublas_scope_handle.cpp b/src/blas/backends/cublas/cublas_scope_handle.cpp index b83185545..013d24d92 100644 --- a/src/blas/backends/cublas/cublas_scope_handle.cpp +++ b/src/blas/backends/cublas/cublas_scope_handle.cpp @@ -43,7 +43,7 @@ thread_local cublas_handle CublasScopedContextHandler::handle_helper cublas_handle{}; #endif -CublasScopedContextHandler::CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih) +CublasScopedContextHandler::CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih) : ih(ih), needToRecover_(false) { placedContext_ = new sycl::context(queue.get_context()); @@ -73,8 +73,8 @@ CublasScopedContextHandler::~CublasScopedContextHandler() noexcept(false) { delete placedContext_; } -void ContextCallback(void *userData) { - auto *ptr = static_cast *>(userData); +void ContextCallback(void* userData) { + auto* ptr = static_cast*>(userData); if (!ptr) { return; } @@ -92,7 +92,7 @@ void ContextCallback(void *userData) { } } -cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue &queue) { +cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue& queue) { auto cudaDevice = ih.get_native_device(); CUresult cuErr; CUcontext desired; @@ -139,10 +139,10 @@ cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue &queue) return handle; } -CUstream CublasScopedContextHandler::get_stream(const sycl::queue &queue) { +CUstream CublasScopedContextHandler::get_stream(const sycl::queue& queue) { return sycl::get_native(queue); } -sycl::context CublasScopedContextHandler::get_context(const sycl::queue &queue) { +sycl::context CublasScopedContextHandler::get_context(const sycl::queue& queue) { return queue.get_context(); } diff --git a/src/blas/backends/cublas/cublas_scope_handle.hpp b/src/blas/backends/cublas/cublas_scope_handle.hpp index 3e0b4a8af..968286ed6 100644 --- a/src/blas/backends/cublas/cublas_scope_handle.hpp +++ b/src/blas/backends/cublas/cublas_scope_handle.hpp @@ -85,19 +85,19 @@ the handle must be destroyed when the context goes out of scope. This will bind class CublasScopedContextHandler { CUcontext original_; - sycl::context *placedContext_; + sycl::context* placedContext_; bool needToRecover_; - sycl::interop_handle &ih; + sycl::interop_handle& ih; #ifdef ONEMATH_PI_INTERFACE_REMOVED static thread_local cublas_handle handle_helper; #else static thread_local cublas_handle handle_helper; #endif - CUstream get_stream(const sycl::queue &queue); - sycl::context get_context(const sycl::queue &queue); + CUstream get_stream(const sycl::queue& queue); + sycl::context get_context(const sycl::queue& queue); public: - CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); ~CublasScopedContextHandler() noexcept(false); /** @@ -107,7 +107,7 @@ class CublasScopedContextHandler { * @param queue sycl queue. * @return cublasHandle_t a handle to construct cublas routines */ - cublasHandle_t get_handle(const sycl::queue &queue); + cublasHandle_t get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. template @@ -116,7 +116,7 @@ class CublasScopedContextHandler { return reinterpret_cast(cudaPtr); } - void wait_stream(const sycl::queue &queue) { + void wait_stream(const sycl::queue& queue) { cuStreamSynchronize(get_stream(queue)); } }; diff --git a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp index 82870f0ae..de1870d28 100644 --- a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp +++ b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp @@ -26,10 +26,10 @@ namespace cublas { thread_local cublas_handle CublasScopedContextHandler::handle_helper = cublas_handle{}; -CublasScopedContextHandler::CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih) +CublasScopedContextHandler::CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih) : interop_h(ih) {} -cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue &queue) { +cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue& queue) { sycl::device device = queue.get_device(); int current_device = interop_h.get_native_device(); CUstream streamId = get_stream(queue); @@ -64,7 +64,7 @@ cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue &queue) return handle; } -CUstream CublasScopedContextHandler::get_stream(const sycl::queue &queue) { +CUstream CublasScopedContextHandler::get_stream(const sycl::queue& queue) { return interop_h.get_native_queue(); } diff --git a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp index bc358d9ca..28354a265 100644 --- a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp +++ b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp @@ -61,13 +61,13 @@ the handle must be destroyed when the context goes out of scope. This will bind class CublasScopedContextHandler { sycl::interop_handle interop_h; static thread_local cublas_handle handle_helper; - sycl::context get_context(const sycl::queue &queue); - CUstream get_stream(const sycl::queue &queue); + sycl::context get_context(const sycl::queue& queue); + CUstream get_stream(const sycl::queue& queue); public: - CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); - cublasHandle_t get_handle(const sycl::queue &queue); + cublasHandle_t get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. diff --git a/src/blas/backends/cublas/cublas_task.hpp b/src/blas/backends/cublas/cublas_task.hpp index 58d5306ff..1b5173052 100644 --- a/src/blas/backends/cublas/cublas_task.hpp +++ b/src/blas/backends/cublas/cublas_task.hpp @@ -58,7 +58,7 @@ namespace cublas { #ifdef __HIPSYCL__ template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { cgh.hipSYCL_enqueue_custom_operation([f, queue](sycl::interop_handle ih) { auto sc = CublasScopedContextHandler(queue, ih); f(sc); @@ -66,9 +66,9 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } #else template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih) { #else cgh.host_task([f, queue](sycl::interop_handle ih) { #endif @@ -78,7 +78,7 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } #endif template -static inline void onemath_cublas_host_task(H &cgh, sycl::queue queue, F f) { +static inline void onemath_cublas_host_task(H& cgh, sycl::queue queue, F f) { (void)host_task_internal(cgh, queue, f); } diff --git a/src/blas/backends/mkl_common/mkl_batch.cxx b/src/blas/backends/mkl_common/mkl_batch.cxx index 93bba84a1..cb97b9693 100644 --- a/src/blas/backends/mkl_common/mkl_batch.cxx +++ b/src/blas/backends/mkl_common/mkl_batch.cxx @@ -19,1015 +19,1142 @@ // Buffer APIs -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - std::int64_t stridex, sycl::buffer &y, int64_t incy, std::int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + std::int64_t stridex, sycl::buffer& y, int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size)); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - std::int64_t stridex, sycl::buffer &y, int64_t incy, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + std::int64_t stridex, sycl::buffer& y, int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size)); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size)); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, int64_t incy, std::int64_t stridey, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size)); } -void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size)); } -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size)); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size)); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size)); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, float beta, sycl::buffer &y, int64_t incy, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, float beta, sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, - incy, stride_y, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, + n, alpha, a, lda, stride_a, x, incx, stride_x, + beta, y, incy, stride_y, batch_size)); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, double beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, - incy, stride_y, batch_size)); +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, double beta, + sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, + n, alpha, a, lda, stride_a, x, incx, stride_x, + beta, y, incy, stride_y, batch_size)); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, - int64_t stride_x, std::complex beta, sycl::buffer, 1> &y, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, - incy, stride_y, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, + n, alpha, a, lda, stride_a, x, incx, stride_x, + beta, y, incy, stride_y, batch_size)); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, + sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, - incy, stride_y, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, + n, alpha, a, lda, stride_a, x, incx, stride_x, + beta, y, incy, stride_y, batch_size)); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, sycl::buffer &c, int64_t ldc, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, - stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, + n, a, lda, stride_a, x, incx, stride_x, c, ldc, + stride_c, batch_size)); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, - stride_c, batch_size)); +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, + n, a, lda, stride_a, x, incx, stride_x, c, ldc, + stride_c, batch_size)); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, - stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, + n, a, lda, stride_a, x, incx, stride_x, c, ldc, + stride_c, batch_size)); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, - stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, + n, a, lda, stride_a, x, incx, stride_x, c, ldc, + stride_c, batch_size)); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size)); +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size)); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, double beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size)); +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, double beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size)); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, - int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, + int64_t stride_b, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size)); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size)); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size)); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size)); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "gemm_batch", "unsupported dtype combination: int8_t, int8_t, float, float"); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size)); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - stride_a, b, ldb, stride_b, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size)); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - stride_a, b, ldb, stride_b, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size)); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - stride_a, b, ldb, stride_b, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size)); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - stride_a, b, ldb, stride_b, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size)); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, stride_a, beta, c, ldc, - stride_c, batch_size)); +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size)); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, stride_a, beta, c, ldc, - stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size)); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, stride_a, beta, c, ldc, - stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size)); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, stride_a, beta, c, ldc, - stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size)); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, - batch_size)); +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, a, lda, stride_a, b, ldb, + stride_b, batch_size)); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, - batch_size)); +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, a, lda, stride_a, b, ldb, + stride_b, batch_size)); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, - batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, a, lda, stride_a, b, ldb, + stride_b, batch_size)); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, - batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, a, lda, stride_a, b, ldb, + stride_b, batch_size)); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch( + queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size)); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch( + queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size)); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch( + queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size)); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy_batch( + queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size)); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size)); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size)); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size)); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size)); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size)); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, stride_a, beta, b, ldb, - stride_b, c, ldc, stride_c, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size)); } // USM APIs -sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t incx, - std::int64_t stridex, float *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size, - dependencies)); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, + std::int64_t stridex, float* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, + stridey, batch_size, dependencies)); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - std::int64_t stridex, double *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size, - dependencies)); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + std::int64_t stridex, double* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, + stridey, batch_size, dependencies)); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size, - dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, + stridey, batch_size, dependencies)); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size, - dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, + stridey, batch_size, dependencies)); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, float **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, - dependencies)); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies)); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, double **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, - dependencies)); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies)); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, - dependencies)); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies)); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, - dependencies)); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies)); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, - dependencies)); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, + stridey, batch_size, dependencies)); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, - dependencies)); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, + stridey, batch_size, dependencies)); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, - dependencies)); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, + stridey, batch_size, dependencies)); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, - dependencies)); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, + stridey, batch_size, dependencies)); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size, - dependencies)); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, + group_count, group_size, dependencies)); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size, - dependencies)); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, + group_count, group_size, dependencies)); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size, - dependencies)); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, + group_count, group_size, dependencies)); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size, - dependencies)); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, + group_count, group_size, dependencies)); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float beta, float *y, int64_t incy, int64_t stride_y, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, stride_x, - beta, y, incy, stride_y, batch_size, dependencies)); +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float beta, float* y, int64_t incy, int64_t stride_y, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch( + queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, + stride_x, beta, y, incy, stride_y, batch_size, dependencies)); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, const double *x, - int64_t incx, int64_t stride_x, double beta, double *y, int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, const double* x, + int64_t incx, int64_t stride_x, double beta, double* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, stride_x, - beta, y, incy, stride_y, batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch( + queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, + stride_x, beta, y, incy, stride_y, batch_size, dependencies)); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, stride_x, - beta, y, incy, stride_y, batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch( + queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, + stride_x, beta, y, incy, stride_y, batch_size, dependencies)); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, stride_x, - beta, y, incy, stride_y, batch_size, dependencies)); -} - -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, const float **x, int64_t *incx, float *beta, - float **y, int64_t *incy, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x, incx, beta, y, incy, - group_count, groupsize, dependencies)); -} - -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, const double **x, int64_t *incx, - double *beta, double **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x, incx, beta, y, incy, - group_count, groupsize, dependencies)); -} - -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x, incx, beta, y, incy, - group_count, groupsize, dependencies)); -} - -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x, incx, beta, y, incy, - group_count, groupsize, dependencies)); -} - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const float *a, - int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, x, incx, stride_x, c, - ldc, stride_c, batch_size, dependencies)); -} - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const double *a, - int64_t lda, int64_t stride_a, const double *x, int64_t incx, - int64_t stride_x, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, x, incx, stride_x, c, - ldc, stride_c, batch_size, dependencies)); -} - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, x, incx, stride_x, c, - ldc, stride_c, batch_size, dependencies)); -} - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, x, incx, stride_x, c, - ldc, stride_c, batch_size, dependencies)); -} - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, x, incx, c, ldc, group_count, - groupsize, dependencies)); -} - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, double **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, x, incx, c, ldc, group_count, - groupsize, dependencies)); -} - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, x, incx, c, ldc, group_count, - groupsize, dependencies)); -} - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, x, incx, c, ldc, group_count, - groupsize, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, int64_t stride_a, - const float *b, int64_t ldb, int64_t stride_b, float beta, float *c, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv_batch( + queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, stride_a, x, incx, + stride_x, beta, y, incy, stride_y, batch_size, dependencies)); +} + +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, const float** x, int64_t* incx, float* beta, + float** y, int64_t* incy, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x, + incx, beta, y, incy, group_count, groupsize, dependencies)); +} + +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, const double** x, int64_t* incx, + double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x, + incx, beta, y, incy, group_count, groupsize, dependencies)); +} + +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x, + incx, beta, y, incy, group_count, groupsize, dependencies)); +} + +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::gemv_batch(queue, detail::get_onemkl_transpose(transa), m, n, alpha, a, lda, x, + incx, beta, y, incy, group_count, groupsize, dependencies)); +} + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, + x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies)); +} + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stride_a, const double* x, int64_t incx, + int64_t stride_x, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, + x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies)); +} + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, + x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies)); +} + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), m, n, a, lda, stride_a, + x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies)); +} + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), + m, n, a, lda, x, incx, c, ldc, group_count, + groupsize, dependencies)); +} + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), + m, n, a, lda, x, incx, c, ldc, group_count, + groupsize, dependencies)); +} + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), + m, n, a, lda, x, incx, c, ldc, group_count, + groupsize, dependencies)); +} + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dgmm_batch(queue, detail::get_onemkl_side(left_right), + m, n, a, lda, x, incx, c, ldc, group_count, + groupsize, dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, int64_t stride_a, + const float* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, + dependencies)); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, int64_t stride_a, - const double *b, int64_t ldb, int64_t stride_b, double beta, double *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, int64_t stride_a, + const double* b, int64_t ldb, int64_t stride_b, double beta, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, + dependencies)); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, + dependencies)); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, - int64_t stride_a, const sycl::half *b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::half *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, int64_t stride_a, - const sycl::half *b, int64_t ldb, int64_t stride_b, float beta, float *c, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, + dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, + int64_t stride_a, const sycl::half* b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::half* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, + dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, int64_t stride_a, + const sycl::half* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, + dependencies)); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", "unsupported dtype combination: int8_t, int8_t, float, float"); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, - std::int32_t *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, stride_a, b, ldb, - stride_b, beta, c, ldc, stride_c, batch_size, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, - const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, - ldc, group_count, group_size, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, - const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, - ldc, group_count, group_size, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, const std::complex **b, - int64_t *ldb, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, - ldc, group_count, group_size, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, const std::complex **b, - int64_t *ldb, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, - ldc, group_count, group_size, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, sycl::half *alpha, const sycl::half **a, - int64_t *lda, const sycl::half **b, int64_t *ldb, sycl::half *beta, - sycl::half **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, - ldc, group_count, groupsize, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const sycl::half **a, int64_t *lda, - const sycl::half **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, - ldc, group_count, groupsize, dependencies)); -} - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, + std::int32_t* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, + dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const float** a, int64_t* lda, + const float** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, double* alpha, const double** a, int64_t* lda, + const double** b, int64_t* ldb, double* beta, double** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, sycl::half* alpha, const sycl::half** a, + int64_t* lda, const sycl::half** b, int64_t* ldb, sycl::half* beta, + sycl::half** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const sycl::half** a, int64_t* lda, + const sycl::half** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, dependencies)); +} + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", "unsupported dtype combination: int8_t, int8_t, float, float"); } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, std::int32_t **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, - ldc, group_count, groupsize, dependencies)); +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, std::int32_t** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, dependencies)); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size, dependencies)); +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size, dependencies)); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size, dependencies)); +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size, dependencies)); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size, dependencies)); + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size, dependencies)); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, - lda, stride_a, b, ldb, stride_b, batch_size, dependencies)); -} - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, float *alpha, const float **a, - int64_t *lda, float **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, - lda, b, ldb, group_count, groupsize, dependencies)); -} - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, double *alpha, const double **a, - int64_t *lda, double **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, - lda, b, ldb, group_count, groupsize, dependencies)); -} - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex **b, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, - lda, b, ldb, group_count, groupsize, dependencies)); -} - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex **b, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, - lda, b, ldb, group_count, groupsize, dependencies)); -} - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, int64_t stride_a, float beta, - float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size, dependencies)); -} - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, int64_t stride_a, double beta, - double *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size, dependencies)); -} - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, stride_a, b, ldb, stride_b, batch_size, dependencies)); +} + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, float* alpha, const float** a, + int64_t* lda, float** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, group_count, groupsize, dependencies)); +} + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, double* alpha, const double** a, + int64_t* lda, double** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, group_count, groupsize, dependencies)); +} + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, group_count, groupsize, dependencies)); +} + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm_batch( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, group_count, groupsize, dependencies)); +} + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies)); +} + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies)); +} + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies)); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, stride_a, beta, c, - ldc, stride_c, batch_size, dependencies)); -} - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - group_count, groupsize, dependencies)); -} - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - group_count, groupsize, dependencies)); -} - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - group_count, groupsize, dependencies)); -} - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - group_count, groupsize, dependencies)); -} - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies)); +} + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies)); +} + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies)); +} + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies)); +} + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk_batch( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies)); +} + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, - batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, + stride_a, b, ldb, stride_b, batch_size, dependencies)); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, - batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, + stride_a, b, ldb, stride_b, batch_size, dependencies)); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, - batch_size, dependencies)); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, + stride_a, b, ldb, stride_b, batch_size, dependencies)); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, stride_a, b, ldb, stride_b, - batch_size, dependencies)); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, + stride_a, b, ldb, stride_b, batch_size, dependencies)); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size, - dependencies)); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, + ldb, stride, batch_size, dependencies)); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size, - dependencies)); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, + ldb, stride, batch_size, dependencies)); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size, - dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, + ldb, stride, batch_size, dependencies)); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, stride, batch_size, - dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, + ldb, stride, batch_size, dependencies)); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, stride_a, beta, b, - ldb, stride_b, c, ldc, stride_c, batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, + dependencies)); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, stride_a, beta, b, - ldb, stride_b, c, ldc, stride_c, batch_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, + dependencies)); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, stride_a, beta, b, - ldb, stride_b, c, ldc, stride_c, batch_size, dependencies)); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, + dependencies)); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, stride_a, beta, b, - ldb, stride_b, c, ldc, stride_c, batch_size, dependencies)); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd_batch( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, + dependencies)); } sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, int64_t group_count, int64_t* groupsize, const std::vector& dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb, group_count, - groupsize, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, + b, ldb, group_count, groupsize, dependencies)); } sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, double* alpha, const double** a, int64_t* lda, double** b, int64_t* ldb, int64_t group_count, int64_t* groupsize, const std::vector& dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb, group_count, - groupsize, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, + b, ldb, group_count, groupsize, dependencies)); } sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, std::complex* alpha, const std::complex** a, int64_t* lda, std::complex** b, int64_t* ldb, int64_t group_count, int64_t* groupsize, const std::vector& dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb, group_count, - groupsize, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, + b, ldb, group_count, groupsize, dependencies)); } sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, @@ -1035,38 +1162,43 @@ sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int int64_t* lda, std::complex** b, int64_t* ldb, int64_t group_count, int64_t* groupsize, const std::vector& dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb, group_count, - groupsize, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::omatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, + b, ldb, group_count, groupsize, dependencies)); } sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, float* alpha, float** ab, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* groupsize, const std::vector& dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, group_count, - groupsize, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, + ldb, group_count, groupsize, dependencies)); } sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, double* alpha, double** ab, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* groupsize, const std::vector& dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, group_count, - groupsize, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, + ldb, group_count, groupsize, dependencies)); } sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, std::complex* alpha, std::complex** ab, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* groupsize, const std::vector& dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, group_count, - groupsize, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, + ldb, group_count, groupsize, dependencies)); } sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, std::complex* alpha, std::complex** ab, int64_t* lda, int64_t* ldb, int64_t group_count, int64_t* groupsize, const std::vector& dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, group_count, - groupsize, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::imatcopy_batch(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, + ldb, group_count, groupsize, dependencies)); } diff --git a/src/blas/backends/mkl_common/mkl_blas_backend.hxx b/src/blas/backends/mkl_common/mkl_blas_backend.hxx index 82588a66a..51937dade 100644 --- a/src/blas/backends/mkl_common/mkl_blas_backend.hxx +++ b/src/blas/backends/mkl_common/mkl_blas_backend.hxx @@ -19,1351 +19,1365 @@ /// level3, buffer -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); // level 3, USM -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, value_or_pointer beta, - float *c, std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, value_or_pointer beta, + float* c, std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, value_or_pointer beta, - double *c, std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, value_or_pointer beta, + double* c, std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - value_or_pointer beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, value_or_pointer beta, - float *c, std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + value_or_pointer beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, + value_or_pointer beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, + value_or_pointer beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, + value_or_pointer beta, bfloat16* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const bfloat16 *a, std::int64_t lda, const bfloat16 *b, - std::int64_t ldb, value_or_pointer beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::int8_t* a, std::int64_t lda, const std::int8_t* b, std::int64_t ldb, + value_or_pointer beta, std::int32_t* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const bfloat16 *a, std::int64_t lda, const bfloat16 *b, - std::int64_t ldb, value_or_pointer beta, bfloat16 *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const std::int8_t *a, - std::int64_t lda, const std::int8_t *b, std::int64_t ldb, - value_or_pointer beta, std::int32_t *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const std::int8_t *a, - std::int64_t lda, const std::int8_t *b, std::int64_t ldb, - value_or_pointer beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, value_or_pointer beta, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, value_or_pointer beta, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, + const std::int8_t* a, std::int64_t lda, const std::int8_t* b, std::int64_t ldb, + value_or_pointer beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, value_or_pointer beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, value_or_pointer beta, double* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const float *a, std::int64_t lda, - value_or_pointer beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const float* a, std::int64_t lda, + value_or_pointer beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const double *a, std::int64_t lda, - value_or_pointer beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const double* a, std::int64_t lda, + value_or_pointer beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, - value_or_pointer> beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + value_or_pointer> beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, - value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const std::complex *a, - std::int64_t lda, value_or_pointer beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const std::complex *a, - std::int64_t lda, value_or_pointer beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, value_or_pointer beta, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, value_or_pointer beta, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, + const std::complex* a, std::int64_t lda, + value_or_pointer> beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const std::complex* a, + std::int64_t lda, value_or_pointer beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const std::complex* a, + std::int64_t lda, value_or_pointer beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, value_or_pointer beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, value_or_pointer beta, double* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); // level 2, buffer -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); - -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); - -void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); - -void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); + +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); -void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); -void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda); +void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda); -void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); +void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); +void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); +void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); +void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); -void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a); -void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); // level 2, USM -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, value_or_pointer beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, value_or_pointer beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, value_or_pointer alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, value_or_pointer beta, - float *y, std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, value_or_pointer alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, value_or_pointer beta, - double *y, std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, value_or_pointer alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, value_or_pointer beta, + float* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, value_or_pointer alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, value_or_pointer> beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, + const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, value_or_pointer> beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - value_or_pointer alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, value_or_pointer beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - value_or_pointer alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, value_or_pointer beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *a, std::int64_t lda, const float *x, std::int64_t incx, - value_or_pointer beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - value_or_pointer beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *a, const float *x, std::int64_t incx, value_or_pointer beta, - float *y, std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *a, const double *x, std::int64_t incx, value_or_pointer beta, - double *y, std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, float *a, - const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, double *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, value_or_pointer alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, float* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, value_or_pointer alpha, + const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + value_or_pointer alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + value_or_pointer alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, + const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* x, std::int64_t incx, double* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const float* a, const float* x, std::int64_t incx, + value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* a, const double* x, + std::int64_t incx, value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, + const float* x, std::int64_t incx, float* a, + const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* a, const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); // level 1, buffer -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result, index_base base=index_base::zero); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result, index_base base = index_base::zero); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result, index_base base=index_base::zero); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result, index_base base = index_base::zero); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result, - index_base base=index_base::zero); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result, + index_base base = index_base::zero); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result, - index_base base=index_base::zero); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result, + index_base base = index_base::zero); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result, index_base base=index_base::zero); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result, index_base base = index_base::zero); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result, index_base base=index_base::zero); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result, index_base base = index_base::zero); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result, - index_base base=index_base::zero); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result, + index_base base = index_base::zero); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result, - index_base base=index_base::zero); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result, + index_base base = index_base::zero); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void axpy(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void sdsdot(sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, float c, float s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, double c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, double c, double s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, float c, float s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, double c, double s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param); -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param); -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer, 1>& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); +void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); // level 1, USM -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer> alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer> alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, const float *x, - std::int64_t incx, value_or_pointer beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, const float* x, + std::int64_t incx, value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, const double *x, - std::int64_t incx, value_or_pointer beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, + const double* x, std::int64_t incx, value_or_pointer beta, double* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer> alpha, + const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer> alpha, + const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); +sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, value_or_pointer c, - value_or_pointer s, const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, value_or_pointer c, + value_or_pointer s, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, value_or_pointer c, - value_or_pointer s, const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, value_or_pointer c, + value_or_pointer s, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, +sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, std::int64_t incy, value_or_pointer c, value_or_pointer s, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, +sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, std::int64_t incy, value_or_pointer c, value_or_pointer s, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, float *c, - std::complex *s, const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, double *c, - std::complex *s, const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, const float *param, - const std::vector &dependencies = {}); +sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, const float* param, + const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, const double *param, - const std::vector &dependencies = {}); +sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, const double* param, + const std::vector& dependencies = {}); -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, value_or_pointer y1, - float *param, const std::vector &dependencies = {}); +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, value_or_pointer y1, + float* param, const std::vector& dependencies = {}); -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, value_or_pointer y1, - double *param, const std::vector &dependencies = {}); +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, + value_or_pointer y1, double* param, + const std::vector& dependencies = {}); -#define ONEMATH_DECLARE_SCAL(T, Ts) \ - sycl::event scal(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, T *x, \ - std::int64_t incx, const std::vector &dependencies = {}); +#define ONEMATH_DECLARE_SCAL(T, Ts) \ + sycl::event scal(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, T* x, \ + std::int64_t incx, const std::vector& dependencies = {}); ONEMATH_DECLARE_SCAL(float, float) ONEMATH_DECLARE_SCAL(double, double) @@ -1371,1124 +1385,1134 @@ ONEMATH_DECLARE_SCAL(std::complex, std::complex) ONEMATH_DECLARE_SCAL(std::complex, std::complex) ONEMATH_DECLARE_SCAL(std::complex, float) ONEMATH_DECLARE_SCAL(std::complex, double) -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); #undef ONEMATH_DECLARE_SCAL -sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); // extensions, buffer -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int8_t ao, - sycl::buffer &b, std::int64_t ldb, std::uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, std::int8_t ao, + sycl::buffer& b, std::int64_t ldb, std::uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int8_t ao, - sycl::buffer &b, std::int64_t ldb, std::int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, std::int8_t ao, + sycl::buffer& b, std::int64_t ldb, std::int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::uint8_t ao, - sycl::buffer &b, std::int64_t ldb, std::int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, std::uint8_t ao, + sycl::buffer& b, std::int64_t ldb, std::int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::uint8_t ao, - sycl::buffer &b, std::int64_t ldb, std::uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, std::uint8_t ao, + sycl::buffer& b, std::int64_t ldb, std::uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); // extensions, USM -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, value_or_pointer beta, - float *c, std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, value_or_pointer beta, + float* c, std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, value_or_pointer beta, - double *c, std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + value_or_pointer beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, value_or_pointer beta, std::int32_t *c, - std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, value_or_pointer beta, + std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, value_or_pointer beta, std::int32_t *c, - std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, value_or_pointer beta, + std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - value_or_pointer beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + value_or_pointer beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - value_or_pointer beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + value_or_pointer beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); // batch, buffer -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, double beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, +void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, +void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, +void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // batch, usm -sycl::event syrk_batch(sycl::queue &queue, const uplo *upper_lower, const transpose *trans, - const std::int64_t *n, const std::int64_t *k, const float *alpha, - const float **a, const std::int64_t *lda, const float *beta, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, const uplo *upper_lower, const transpose *trans, - const std::int64_t *n, const std::int64_t *k, const double *alpha, - const double **a, const std::int64_t *lda, const double *beta, double **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, const uplo *upper_lower, const transpose *trans, - const std::int64_t *n, const std::int64_t *k, - const std::complex *alpha, const std::complex **a, - const std::int64_t *lda, const std::complex *beta, - std::complex **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, const uplo *upper_lower, const transpose *trans, - const std::int64_t *n, const std::int64_t *k, - const std::complex *alpha, const std::complex **a, - const std::int64_t *lda, const std::complex *beta, - std::complex **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, value_or_pointer beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const double *a, +sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans, + const std::int64_t* n, const std::int64_t* k, const float* alpha, + const float** a, const std::int64_t* lda, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans, + const std::int64_t* n, const std::int64_t* k, const double* alpha, + const double** a, const std::int64_t* lda, const double* beta, double** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans, + const std::int64_t* n, const std::int64_t* k, + const std::complex* alpha, const std::complex** a, + const std::int64_t* lda, const std::complex* beta, + std::complex** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans, + const std::int64_t* n, const std::int64_t* k, + const std::complex* alpha, const std::complex** a, + const std::int64_t* lda, const std::complex* beta, + std::complex** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, value_or_pointer beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const double* a, std::int64_t lda, std::int64_t stride_a, value_or_pointer beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - value_or_pointer> beta, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + value_or_pointer> beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - value_or_pointer> beta, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + value_or_pointer> beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, const std::int64_t *n, const float **x, - const std::int64_t *incx, float **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, const std::int64_t *n, const double **x, - const std::int64_t *incx, double **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, const std::int64_t *n, const std::complex **x, - const std::int64_t *incx, std::complex **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, const std::int64_t *n, const std::complex **x, - const std::int64_t *incx, std::complex **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float *c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const float** x, + const std::int64_t* incx, float** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const double** x, + const std::int64_t* incx, double** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex** x, + const std::int64_t* incx, std::complex** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex** x, + const std::int64_t* incx, std::complex** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double *c, std::int64_t ldc, +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, const side *left_right, const std::int64_t *m, - const std::int64_t *n, const float **a, const std::int64_t *lda, - const float **x, const std::int64_t *incx, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, const side *left_right, const std::int64_t *m, - const std::int64_t *n, const double **a, const std::int64_t *lda, - const double **x, const std::int64_t *incx, double **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, const side *left_right, const std::int64_t *m, - const std::int64_t *n, const std::complex **a, - const std::int64_t *lda, const std::complex **x, - const std::int64_t *incx, std::complex **c, const std::int64_t *ldc, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, const side *left_right, const std::int64_t *m, - const std::int64_t *n, const std::complex **a, - const std::int64_t *lda, const std::complex **x, - const std::int64_t *incx, std::complex **c, const std::int64_t *ldc, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, value_or_pointer beta, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, value_or_pointer beta, double *y, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m, + const std::int64_t* n, const float** a, const std::int64_t* lda, + const float** x, const std::int64_t* incx, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m, + const std::int64_t* n, const double** a, const std::int64_t* lda, + const double** x, const std::int64_t* incx, double** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m, + const std::int64_t* n, const std::complex** a, + const std::int64_t* lda, const std::complex** x, + const std::int64_t* incx, std::complex** c, const std::int64_t* ldc, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m, + const std::int64_t* n, const std::complex** a, + const std::int64_t* lda, const std::complex** x, + const std::int64_t* incx, std::complex** c, const std::int64_t* ldc, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, value_or_pointer beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, value_or_pointer beta, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, - value_or_pointer> beta, std::complex *y, + value_or_pointer> beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, - value_or_pointer> beta, std::complex *y, + value_or_pointer> beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const float *alpha, const float **a, - const std::int64_t *lda, const float **x, const std::int64_t *incx, - const float *beta, float **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const double *alpha, const double **a, - const std::int64_t *lda, const double **x, const std::int64_t *incx, - const double *beta, double **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - const std::complex **x, const std::int64_t *incx, - const std::complex *beta, std::complex **y, - const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - const std::complex **x, const std::int64_t *incx, - const std::complex *beta, std::complex **y, - const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, const std::int64_t *n, const double *alpha, - const double **x, const std::int64_t *incx, double **y, - const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, const std::int64_t *n, const float *alpha, - const float **x, const std::int64_t *incx, float **y, - const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, const std::int64_t *n, const std::complex *alpha, - const std::complex **x, const std::int64_t *incx, - std::complex **y, const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, const std::int64_t *n, const std::complex *alpha, - const std::complex **x, const std::int64_t *incx, - std::complex **y, const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const float* alpha, const float** a, + const std::int64_t* lda, const float** x, const std::int64_t* incx, + const float* beta, float** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const double* alpha, const double** a, + const std::int64_t* lda, const double** x, const std::int64_t* incx, + const double* beta, double** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + const std::complex** x, const std::int64_t* incx, + const std::complex* beta, std::complex** y, + const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + const std::complex** x, const std::int64_t* incx, + const std::complex* beta, std::complex** y, + const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const double* alpha, + const double** x, const std::int64_t* incx, double** y, + const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const float* alpha, + const float** x, const std::int64_t* incx, float** y, + const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex* alpha, + const std::complex** x, const std::int64_t* incx, + std::complex** y, const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex* alpha, + const std::complex** x, const std::int64_t* incx, + std::complex** y, const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const float **a, const std::int64_t *lda, - const float **b, const std::int64_t *ldb, const float *beta, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const double *alpha, const double **a, const std::int64_t *lda, - const double **b, const std::int64_t *ldb, const double *beta, double **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const std::complex *alpha, const std::complex **a, - const std::int64_t *lda, const std::complex **b, - const std::int64_t *ldb, const std::complex *beta, - std::complex **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const std::complex *alpha, const std::complex **a, - const std::int64_t *lda, const std::complex **b, - const std::int64_t *ldb, const std::complex *beta, - std::complex **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const sycl::half *alpha, const sycl::half **a, const std::int64_t *lda, - const sycl::half **b, const std::int64_t *ldb, const sycl::half *beta, - sycl::half **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const sycl::half **a, const std::int64_t *lda, - const sycl::half **b, const std::int64_t *ldb, const float *beta, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const bfloat16 **a, const std::int64_t *lda, - const bfloat16 **b, const std::int64_t *ldb, const float *beta, - bfloat16 **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const bfloat16 **a, const std::int64_t *lda, - const bfloat16 **b, const std::int64_t *ldb, const float *beta, - float **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const std::int8_t **a, const std::int64_t *lda, - const std::int8_t **b, const std::int64_t *ldb, const float *beta, - std::int32_t **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const std::int8_t **a, const std::int64_t *lda, - const std::int8_t **b, const std::int64_t *ldb, const float *beta, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, value_or_pointer beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, value_or_pointer beta, double *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const float** a, const std::int64_t* lda, + const float** b, const std::int64_t* ldb, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const double* alpha, const double** a, const std::int64_t* lda, + const double** b, const std::int64_t* ldb, const double* beta, double** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const std::complex* alpha, const std::complex** a, + const std::int64_t* lda, const std::complex** b, + const std::int64_t* ldb, const std::complex* beta, + std::complex** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const std::complex* alpha, const std::complex** a, + const std::int64_t* lda, const std::complex** b, + const std::int64_t* ldb, const std::complex* beta, + std::complex** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const sycl::half* alpha, const sycl::half** a, const std::int64_t* lda, + const sycl::half** b, const std::int64_t* ldb, const sycl::half* beta, + sycl::half** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const sycl::half** a, const std::int64_t* lda, + const sycl::half** b, const std::int64_t* ldb, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const bfloat16** a, const std::int64_t* lda, + const bfloat16** b, const std::int64_t* ldb, const float* beta, bfloat16** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const bfloat16** a, const std::int64_t* lda, + const bfloat16** b, const std::int64_t* ldb, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const std::int8_t** a, const std::int64_t* lda, + const std::int8_t** b, const std::int64_t* ldb, const float* beta, + std::int32_t** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const std::int8_t** a, const std::int64_t* lda, + const std::int8_t** b, const std::int64_t* ldb, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, + const float* a, std::int64_t lda, std::int64_t stride_a, const float* b, + std::int64_t ldb, std::int64_t stride_b, value_or_pointer beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, const double* b, + std::int64_t ldb, std::int64_t stride_b, value_or_pointer beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer> beta, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer> beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer> beta, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer> beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, sycl::half *c, std::int64_t ldc, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const bfloat16 *a, std::int64_t lda, std::int64_t stride_a, - const bfloat16 *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, bfloat16 *c, std::int64_t ldc, + const bfloat16* a, std::int64_t lda, std::int64_t stride_a, + const bfloat16* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, bfloat16* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const bfloat16 *a, std::int64_t lda, std::int64_t stride_a, - const bfloat16 *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); + const bfloat16* a, std::int64_t lda, std::int64_t stride_a, + const bfloat16* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, std::int32_t *c, std::int64_t ldc, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + value_or_pointer alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, - const transpose *trans, const diag *unit_diag, const std::int64_t *m, - const std::int64_t *n, const float *alpha, const float **a, - const std::int64_t *lda, float **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, - const transpose *trans, const diag *unit_diag, const std::int64_t *m, - const std::int64_t *n, const double *alpha, const double **a, - const std::int64_t *lda, double **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, - const transpose *trans, const diag *unit_diag, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - std::complex **b, const std::int64_t *ldb, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, - const transpose *trans, const diag *unit_diag, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - std::complex **b, const std::int64_t *ldb, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower, + const transpose* trans, const diag* unit_diag, const std::int64_t* m, + const std::int64_t* n, const float* alpha, const float** a, + const std::int64_t* lda, float** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower, + const transpose* trans, const diag* unit_diag, const std::int64_t* m, + const std::int64_t* n, const double* alpha, const double** a, + const std::int64_t* lda, double** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower, + const transpose* trans, const diag* unit_diag, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + std::complex** b, const std::int64_t* ldb, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower, + const transpose* trans, const diag* unit_diag, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + std::complex** b, const std::int64_t* ldb, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, float *ab, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, double *ab, std::int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, std::complex *ab, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, std::complex *ab, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const float *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const float* a, std::int64_t lda, std::int64_t stride_a, value_or_pointer beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const double *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const double* a, std::int64_t lda, std::int64_t stride_a, value_or_pointer beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - value_or_pointer> beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + value_or_pointer> beta, const std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - value_or_pointer> beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + value_or_pointer> beta, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, std::complex *ab, +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, std::complex *ab, +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const float *a, std::int64_t lda, - value_or_pointer beta, const float *b, std::int64_t ldb, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const float* a, std::int64_t lda, + value_or_pointer beta, const float* b, std::int64_t ldb, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const double *a, std::int64_t lda, - value_or_pointer beta, const double *b, std::int64_t ldb, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const double* a, + std::int64_t lda, value_or_pointer beta, const double* b, + std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, - value_or_pointer> beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + value_or_pointer> beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, - value_or_pointer> beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const float *alpha, const float **a, - const std::int64_t *lda, float **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const double *alpha, const double **a, - const std::int64_t *lda, double **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - std::complex **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - std::complex **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const float *alpha, float **ab, - const std::int64_t *lda, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const double *alpha, double **ab, - const std::int64_t *lda, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - std::complex **ab, const std::int64_t *lda, - const std::int64_t *ldb, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - std::complex **ab, const std::int64_t *lda, - const std::int64_t *ldb, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + value_or_pointer> beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const float* alpha, const float** a, + const std::int64_t* lda, float** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const double* alpha, const double** a, + const std::int64_t* lda, double** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + std::complex** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + std::complex** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const float* alpha, float** ab, + const std::int64_t* lda, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const double* alpha, double** ab, + const std::int64_t* lda, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + std::complex** ab, const std::int64_t* lda, + const std::int64_t* ldb, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + std::complex** ab, const std::int64_t* lda, + const std::int64_t* ldb, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); diff --git a/src/blas/backends/mkl_common/mkl_extensions.cxx b/src/blas/backends/mkl_common/mkl_extensions.cxx index 28500b63c..bca426982 100644 --- a/src/blas/backends/mkl_common/mkl_extensions.cxx +++ b/src/blas/backends/mkl_common/mkl_extensions.cxx @@ -19,341 +19,397 @@ // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, - beta, c, ldc, co)); -} - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, - beta, c, ldc, co)); -} - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, - beta, c, ldc, co)); -} - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, - beta, c, ldc, co)); -} - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), + detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co)); +} + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), + detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co)); +} + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), + detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co)); +} + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm_bias( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), + detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co)); +} + +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, - ldc)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, - ldc)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, - ldc)); +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, - ldc)); +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemmt( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb)); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, b, ldb)); } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb)); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, b, ldb)); } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb)); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, b, ldb)); } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb)); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, b, ldb)); } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb)); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, ab, lda, ldb)); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb)); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, ab, lda, ldb)); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb)); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, ab, lda, ldb)); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb)); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, ab, lda, ldb)); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc)); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, alpha, + a, lda, beta, b, ldb, c, ldc)); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc)); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, alpha, + a, lda, beta, b, ldb, c, ldc)); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc)); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, alpha, + a, lda, beta, b, ldb, c, ldc)); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc)); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, alpha, + a, lda, beta, b, ldb, c, ldc)); } // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, - bo, beta, c, ldc, co, dependencies)); -} - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, - bo, beta, c, ldc, co, dependencies)); -} - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, - bo, beta, c, ldc, co, dependencies)); -} - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, - bo, beta, c, ldc, co, dependencies)); -} - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies)); -} - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies)); -} - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies)); -} - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, - c, ldc, dependencies)); -} - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb, dependencies)); -} - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb, dependencies)); -} - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb, dependencies)); -} - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, b, ldb, dependencies)); -} - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, std::int64_t stridea, float *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), + detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co, dependencies)); +} + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), + detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co, dependencies)); +} + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), + detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co, dependencies)); +} + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm_bias( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), + detail::get_onemkl_offset(offsetc), m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, + co, dependencies)); +} + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies)); +} + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies)); +} + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies)); +} + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemmt( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), n, k, alpha, a, lda, b, ldb, beta, c, ldc, + dependencies)); +} + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, float* b, int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, a, lda, b, ldb, dependencies)); +} + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, a, lda, b, ldb, dependencies)); +} + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, a, lda, b, ldb, dependencies)); +} + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatcopy(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, a, lda, b, ldb, dependencies)); +} + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, std::int64_t stridea, float* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, std::int64_t stridea, double *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, std::int64_t stridea, double* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, dependencies)); +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, ab, lda, ldb, dependencies)); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, dependencies)); +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, ab, lda, ldb, dependencies)); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, dependencies)); +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, ab, lda, ldb, dependencies)); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), m, n, alpha, ab, lda, ldb, dependencies)); +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::imatcopy(queue, detail::get_onemkl_transpose(trans), + m, n, alpha, ab, lda, ldb, dependencies)); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float beta, const float *b, - int64_t ldb, float *c, int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies)); +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, float beta, const float* b, + int64_t ldb, float* c, int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, beta, b, ldb, c, ldc, dependencies)); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double beta, const double *b, - int64_t ldb, double *c, int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies)); +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, double beta, const double* b, + int64_t ldb, double* c, int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, beta, b, ldb, c, ldc, dependencies)); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies)); +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, beta, b, ldb, c, ldc, dependencies)); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, alpha, a, lda, beta, b, ldb, c, ldc, - dependencies)); +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::omatadd( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, + alpha, a, lda, beta, b, ldb, c, ldc, dependencies)); } diff --git a/src/blas/backends/mkl_common/mkl_level1.cxx b/src/blas/backends/mkl_common/mkl_level1.cxx index b6a985826..fac39ad36 100644 --- a/src/blas/backends/mkl_common/mkl_level1.cxx +++ b/src/blas/backends/mkl_common/mkl_level1.cxx @@ -19,627 +19,645 @@ // Buffer APIs -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::asum(queue, n, x, incx, result)); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::asum(queue, n, x, incx, result)); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::asum(queue, n, x, incx, result)); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::asum(queue, n, x, incx, result)); } -void axpy(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy(queue, n, alpha, x, incx, y, incy)); } -void axpy(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy(queue, n, alpha, x, incx, y, incy)); } -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy(queue, n, alpha, x, incx, y, incy)); } -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpy(queue, n, alpha, x, incx, y, incy)); } -void axpby(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy)); } -void axpby(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy)); } -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy)); } -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy)); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy(queue, n, x, incx, y, incy)); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy(queue, n, x, incx, y, incy)); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy(queue, n, x, incx, y, incy)); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::copy(queue, n, x, incx, y, incy)); } -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::dot(queue, n, x, incx, y, incy, result)); } -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::dot(queue, n, x, incx, y, incy, result)); } -void sdsdot(sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::sdsdot(queue, n, sb, x, incx, y, incy, result)); } -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::dot(queue, n, x, incx, y, incy, result)); } -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::dotc(queue, n, x, incx, y, incy, result)); } -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::dotc(queue, n, x, incx, y, incy, result)); } -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::dotu(queue, n, x, incx, y, incy, result)); } -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::dotu(queue, n, x, incx, y, incy, result)); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::nrm2(queue, n, x, incx, result)); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::nrm2(queue, n, x, incx, result)); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::nrm2(queue, n, x, incx, result)); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::nrm2(queue, n, x, incx, result)); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rot(queue, n, x, incx, y, incy, c, s)); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, double c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rot(queue, n, x, incx, y, incy, c, s)); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, float c, float s) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rot(queue, n, x, incx, y, incy, c, s)); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, double c, double s) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rot(queue, n, x, incx, y, incy, c, s)); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotg(queue, a, b, c, s)); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotg(queue, a, b, c, s)); } -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotg(queue, a, b, c, s)); } -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotg(queue, a, b, c, s)); } -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotm(queue, n, x, incx, y, incy, param)); } -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotm(queue, n, x, incx, y, incy, param)); } -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotmg(queue, d1, d2, x1, y1, param)); } -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::rotmg(queue, d1, d2, x1, y1, param)); } -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx)); } -void scal(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx)); } -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx)); } -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx)); } -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer, 1>& x, std::int64_t incx) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx)); } -void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::scal(queue, n, alpha, x, incx)); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::swap(queue, n, x, incx, y, incy)); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::swap(queue, n, x, incx, y, incy)); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::swap(queue, n, x, incx, y, incy)); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::swap(queue, n, x, incx, y, incy)); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamax(queue, n, x, incx, result)); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamax(queue, n, x, incx, result)); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamax(queue, n, x, incx, result)); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamax(queue, n, x, incx, result)); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamin(queue, n, x, incx, result)); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamin(queue, n, x, incx, result)); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamin(queue, n, x, incx, result)); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::iamin(queue, n, x, incx, result)); } // USM APIs -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::asum(queue, n, x, incx, result, dependencies)); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::asum(queue, n, x, incx, result, dependencies)); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::asum(queue, n, x, incx, result, dependencies)); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::asum(queue, n, x, incx, result, dependencies)); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *y, std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies)); +sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* y, std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies)); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies)); +sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies)); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies)); +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies)); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies)); +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies)); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies)); +sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies)); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies)); +sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies)); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies)); +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies)); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies)); +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies)); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy(queue, n, x, incx, y, incy, dependencies)); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy(queue, n, x, incx, y, incy, dependencies)); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy(queue, n, x, incx, y, incy, dependencies)); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::copy(queue, n, x, incx, y, incy, dependencies)); } -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dot(queue, n, x, incx, y, incy, result, dependencies)); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dot(queue, n, x, incx, y, incy, result, dependencies)); } -sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dot(queue, n, x, incx, y, incy, result, dependencies)); +sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dot(queue, n, x, incx, y, incy, result, dependencies)); } -sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::sdsdot(queue, n, sb, x, incx, y, incy, result, dependencies)); +sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::sdsdot(queue, n, sb, x, incx, y, incy, result, dependencies)); } -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dot(queue, n, x, incx, y, incy, result, dependencies)); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dot(queue, n, x, incx, y, incy, result, dependencies)); } -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies)); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies)); } -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies)); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies)); } -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies)); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies)); } -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies)); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies)); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::nrm2(queue, n, x, incx, result, dependencies)); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::nrm2(queue, n, x, incx, result, dependencies)); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::nrm2(queue, n, x, incx, result, dependencies)); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::nrm2(queue, n, x, incx, result, dependencies)); } -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies)); } -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies)); } -sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float c, float s, const std::vector &dependencies) { +sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, float c, float s, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies)); } -sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, +sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies)); } -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotg(queue, a, b, c, s, dependencies)); } -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotg(queue, a, b, c, s, dependencies)); } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, float *c, - std::complex *s, const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotg(queue, a, b, c, s, dependencies)); } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, double *c, - std::complex *s, const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotg(queue, a, b, c, s, dependencies)); } -sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float *param, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies)); +sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, float* param, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies)); } -sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, double *param, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies)); +sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, double* param, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies)); } -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, - const std::vector &dependencies) { +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, float* param, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotmg(queue, d1, d2, x1, y1, param, dependencies)); } -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, double* param, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::rotmg(queue, d1, d2, x1, y1, param, dependencies)); } -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x, std::int64_t incx, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies)); } -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x, std::int64_t incx, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies)); } -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies)); } -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies)); } -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies)); } -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::scal(queue, n, alpha, x, incx, dependencies)); } -sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::swap(queue, n, x, incx, y, incy, dependencies)); } -sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::swap(queue, n, x, incx, y, incy, dependencies)); } -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::swap(queue, n, x, incx, y, incy, dependencies)); } -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::swap(queue, n, x, incx, y, incy, dependencies)); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamax(queue, n, x, incx, result, dependencies)); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamax(queue, n, x, incx, result, dependencies)); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamax(queue, n, x, incx, result, dependencies)); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamax(queue, n, x, incx, result, dependencies)); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamin(queue, n, x, incx, result, dependencies)); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamin(queue, n, x, incx, result, dependencies)); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamin(queue, n, x, incx, result, dependencies)); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::iamin(queue, n, x, incx, result, dependencies)); } diff --git a/src/blas/backends/mkl_common/mkl_level2.cxx b/src/blas/backends/mkl_common/mkl_level2.cxx index f2d0a5561..0b0f40c6c 100644 --- a/src/blas/backends/mkl_common/mkl_level2.cxx +++ b/src/blas/backends/mkl_common/mkl_level2.cxx @@ -19,844 +19,1022 @@ // Buffer APIs -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, x, incx, beta, y, incy)); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, x, incx, beta, y, incy)); } -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, x, incx, beta, y, incy)); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, x, incx, beta, y, incy)); } -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, x, incx, beta, y, incy)); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, x, incx, beta, y, incy)); } -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, x, incx, beta, y, incy)); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, x, incx, beta, y, incy)); } -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, + ku, alpha, a, lda, x, incx, beta, y, incy)); } -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, + ku, alpha, a, lda, x, incx, beta, y, incy)); } -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy)); +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, + ku, alpha, a, lda, x, incx, beta, y, incy)); } -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy)); +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, + ku, alpha, a, lda, x, incx, beta, y, incy)); } -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda)); } -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda)); } -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda)); } -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda)); } -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda)); } -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { RETHROW_ONEMKL_EXCEPTIONS(blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda)); } -void hbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy)); +void hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, + lda, x, incx, beta, y, incy)); } -void hbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy)); +void hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, + lda, x, incx, beta, y, incy)); } -void hemv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy)); +void hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, + lda, x, incx, beta, y, incy)); } -void hemv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy)); +void hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, + lda, x, incx, beta, y, incy)); } -void her(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda)); +void her(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda)); } -void her(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda)); +void her(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda)); } -void her2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)); +void her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)); } -void her2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)); +void her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)); } -void hpmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, incx, beta, y, incy)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, + incx, beta, y, incy)); } -void hpmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, incx, beta, y, incy)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, + incx, beta, y, incy)); } -void hpr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a)); +void hpr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a)); } -void hpr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a)); +void hpr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a)); } -void hpr2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a)); +void hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a)); } -void hpr2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a)); +void hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a)); } -void sbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy)); +void sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, + lda, x, incx, beta, y, incy)); } -void sbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy)); +void sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, + lda, x, incx, beta, y, incy)); } -void spmv(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, incx, beta, y, incy)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, + incx, beta, y, incy)); } -void spmv(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, incx, beta, y, incy)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, + incx, beta, y, incy)); } -void spr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a)); +void spr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a)); } -void spr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a)); +void spr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a)); } -void spr2(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a)); +void spr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a)); } -void spr2(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a)); +void spr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a)); } -void symv(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy)); +void symv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, + lda, x, incx, beta, y, incy)); } -void symv(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy)); +void symv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, + lda, x, incx, beta, y, incy)); } -void syr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda)); +void syr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda)); } -void syr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda)); +void syr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda)); } -void syr2(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)); +void syr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)); } -void syr2(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)); +void syr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda)); } -void tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); } -void tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); } -void tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); +void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); } -void tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); +void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); } -void tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); } -void tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); } -void tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); +void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); } -void tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); +void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx)); } -void tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx)); +void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx)); } -void tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx)); +void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx)); } -void tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx)); } -void tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx)); } -void tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx)); +void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx)); } -void tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx)); +void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx)); } -void tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx)); } -void tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx)); } -void trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx)); } -void trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx)); } -void trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx)); +void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx)); } -void trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx)); +void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx)); } -void trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx)); } -void trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx)); } -void trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx)); +void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx)); } -void trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx)); +void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx)); } // USM APIs -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, x, incx, beta, y, incy, - dependencies)); -} - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, x, incx, beta, y, incy, - dependencies)); -} - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, x, incx, beta, y, incy, - dependencies)); -} - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, alpha, a, lda, x, incx, beta, y, incy, - dependencies)); -} - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, - dependencies)); -} - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, - dependencies)); -} - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, x, incx, beta, y, incy, + dependencies)); +} + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, x, incx, beta, y, incy, + dependencies)); +} + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, x, incx, beta, y, incy, + dependencies)); +} + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemv(queue, detail::get_onemkl_transpose(trans), m, n, + alpha, a, lda, x, incx, beta, y, incy, + dependencies)); +} + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, + kl, ku, alpha, a, lda, x, incx, beta, y, incy, + dependencies)); +} + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, + kl, ku, alpha, a, lda, x, incx, beta, y, incy, + dependencies)); +} + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, - dependencies)); + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, + kl, ku, alpha, a, lda, x, incx, beta, y, incy, + dependencies)); } -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, - dependencies)); + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gbmv(queue, detail::get_onemkl_transpose(trans), m, n, + kl, ku, alpha, a, lda, x, incx, beta, y, incy, + dependencies)); } - -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); + +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); } -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); } -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); } -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); } -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); } -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies)); } -sycl::event hbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies)); +sycl::event hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, + alpha, a, lda, x, incx, beta, y, incy, + dependencies)); } - -sycl::event hbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies)); + +sycl::event hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hbmv(queue, detail::get_onemkl_uplo(uplo), n, k, + alpha, a, lda, x, incx, beta, y, incy, + dependencies)); } -sycl::event hemv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy, dependencies)); +sycl::event hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, + a, lda, x, incx, beta, y, incy, dependencies)); } -sycl::event hemv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy, dependencies)); +sycl::event hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemv(queue, detail::get_onemkl_uplo(uplo), n, alpha, + a, lda, x, incx, beta, y, incy, dependencies)); } -sycl::event her(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda, dependencies)); +sycl::event her(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, + incx, a, lda, dependencies)); } -sycl::event her(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda, dependencies)); +sycl::event her(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, + incx, a, lda, dependencies)); } -sycl::event her2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda, dependencies)); +sycl::event her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, + x, incx, y, incy, a, lda, dependencies)); } -sycl::event her2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda, dependencies)); +sycl::event her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2(queue, detail::get_onemkl_uplo(uplo), n, alpha, + x, incx, y, incy, a, lda, dependencies)); } -sycl::event hpmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, incx, beta, y, incy, dependencies)); +sycl::event hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, + a, x, incx, beta, y, incy, dependencies)); } -sycl::event hpmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, incx, beta, y, incy, dependencies)); +sycl::event hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, + a, x, incx, beta, y, incy, dependencies)); } -sycl::event hpr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies)); +sycl::event hpr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies)); } -sycl::event hpr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies)); +sycl::event hpr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::hpr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies)); } -sycl::event hpr2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, dependencies)); +sycl::event hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, + x, incx, y, incy, a, dependencies)); } -sycl::event hpr2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, dependencies)); +sycl::event hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hpr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, + x, incx, y, incy, a, dependencies)); } -sycl::event sbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies)); +sycl::event sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, + alpha, a, lda, x, incx, beta, y, incy, + dependencies)); } -sycl::event sbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies)); +sycl::event sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::sbmv(queue, detail::get_onemkl_uplo(uplo), n, k, + alpha, a, lda, x, incx, beta, y, incy, + dependencies)); } -sycl::event spmv(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *a, - const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, incx, beta, y, incy, dependencies)); +sycl::event spmv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* a, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, + a, x, incx, beta, y, incy, dependencies)); } -sycl::event spmv(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *a, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, x, incx, beta, y, incy, dependencies)); +sycl::event spmv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* a, + const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spmv(queue, detail::get_onemkl_uplo(uplo), n, alpha, + a, x, incx, beta, y, incy, dependencies)); } -sycl::event spr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies)); +sycl::event spr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies)); } -sycl::event spr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies)); +sycl::event spr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::spr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, dependencies)); } -sycl::event spr2(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, dependencies)); +sycl::event spr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, + x, incx, y, incy, a, dependencies)); } -sycl::event spr2(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, dependencies)); +sycl::event spr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* a, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::spr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, + x, incx, y, incy, a, dependencies)); } -sycl::event symv(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy, dependencies)); +sycl::event symv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, + a, lda, x, incx, beta, y, incy, dependencies)); } -sycl::event symv(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, a, lda, x, incx, beta, y, incy, dependencies)); +sycl::event symv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symv(queue, detail::get_onemkl_uplo(uplo), n, alpha, + a, lda, x, incx, beta, y, incy, dependencies)); } -sycl::event syr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda, dependencies)); +sycl::event syr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, + incx, a, lda, dependencies)); } -sycl::event syr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, a, lda, dependencies)); +sycl::event syr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, + incx, a, lda, dependencies)); } -sycl::event syr2(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda, dependencies)); +sycl::event syr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, + x, incx, y, incy, a, lda, dependencies)); } -sycl::event syr2(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, x, incx, y, incy, a, lda, dependencies)); +sycl::event syr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2(queue, detail::get_onemkl_uplo(uplo), n, alpha, + x, incx, y, incy, a, lda, dependencies)); } -sycl::event tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); +sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); } -sycl::event tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); +sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); } -sycl::event tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); +sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); } -sycl::event tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); +sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tbmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); } -sycl::event tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); +sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); } -sycl::event tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); +sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); } -sycl::event tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); +sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); } -sycl::event tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); +sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tbsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, k, a, lda, x, incx, dependencies)); } -sycl::event tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); +sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); } -sycl::event tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); +sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); } -sycl::event tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); +sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); } -sycl::event tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); -} +sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tpmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); +} + +sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); +} + +sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); +} + +sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); +} -sycl::event tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); -} - -sycl::event tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); -} - -sycl::event tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); -} - -sycl::event tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); -} +sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::tpsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, x, incx, dependencies)); +} -sycl::event trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); -} +sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); +} -sycl::event trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); -} +sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); +} -sycl::event trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); -} +sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); +} -sycl::event trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); -} +sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::trmv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); +} -sycl::event trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); -} +sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); +} -sycl::event trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); -} +sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); +} -sycl::event trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); -} +sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); +} -sycl::event trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); +sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + blas_major::trsv(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, a, lda, x, incx, dependencies)); } diff --git a/src/blas/backends/mkl_common/mkl_level3.cxx b/src/blas/backends/mkl_common/mkl_level3.cxx index 62d26128e..e76dac543 100644 --- a/src/blas/backends/mkl_common/mkl_level3.cxx +++ b/src/blas/backends/mkl_common/mkl_level3.cxx @@ -19,501 +19,608 @@ // Buffer APIs -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc)); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc)); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc)); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc)); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc)); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc)); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, ldc)); -} + RETHROW_ONEMKL_EXCEPTIONS(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), + detail::get_onemkl_transpose(transb), m, n, k, alpha, + a, lda, b, ldb, beta, c, ldc)); +} -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, ldc)); -} +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, + lda, b, ldb, beta, c, ldc)); +} -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc)); -} - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc)); -} +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, + lda, b, ldb, beta, c, ldc)); +} -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc)); -} +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, + lda, b, ldb, beta, c, ldc)); +} -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc)); -} +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::symm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, + lda, b, ldb, beta, c, ldc)); +} -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc)); +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, + lda, b, ldb, beta, c, ldc)); +} + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::hemm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, + lda, b, ldb, beta, c, ldc)); +} + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, beta, c, ldc)); +} + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, beta, c, ldc)); +} + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, beta, c, ldc)); +} + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, beta, c, ldc)); +} + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, beta, c, ldc)); +} + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, beta, c, ldc)); +} + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, b, ldb, beta, c, ldc)); +} + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, b, ldb, beta, c, ldc)); +} + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, b, ldb, beta, c, ldc)); +} + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, b, ldb, beta, c, ldc)); } - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc)); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::her2k(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, b, ldb, beta, c, ldc)); } - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS(blas_major::her2k(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, a, + lda, b, ldb, beta, c, ldc)); } - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::her2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::her2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc)); -} - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, - ldb)); -} - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, - ldb)); -} - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::trmm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb)); +} + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::trmm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb)); +} + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, - ldb)); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::trmm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb)); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trmm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, - ldb)); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::trmm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb)); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, - ldb)); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::trsm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb)); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, - ldb)); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::trsm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb)); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, - ldb)); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::trsm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb)); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS(blas_major::trsm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, - ldb)); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS( + blas_major::trsm(queue, detail::get_onemkl_side(left_right), + detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), + detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, b, ldb)); } // USM APIs -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, sycl::half beta, - sycl::half *c, std::int64_t ldc, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, std::int64_t lda, - const bfloat16 *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm(queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies)); -} - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies)); -} - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies)); -} - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies)); -} - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies)); -} - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, alpha, a, lda, b, ldb, beta, c, - ldc, dependencies)); -} - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, float beta, - float *c, std::int64_t ldc, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - dependencies)); -} - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, double beta, - double *c, std::int64_t ldc, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - dependencies)); -} - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - dependencies)); -} - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - dependencies)); -} - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - dependencies)); -} - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const std::complex *a, std::int64_t lda, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, beta, c, ldc, - dependencies)); -} - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2k(queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, alpha, a, lda, b, ldb, beta, c, ldc, - dependencies)); -} - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - b, ldb, dependencies)); -} - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - b, ldb, dependencies)); -} - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda, + const bfloat16* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::gemm( + queue, detail::get_onemkl_transpose(transa), detail::get_onemkl_transpose(transb), m, n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::symm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::hemm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), m, n, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, + a, lda, beta, c, ldc, dependencies)); +} + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, + a, lda, beta, c, ldc, dependencies)); +} + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, + a, lda, beta, c, ldc, dependencies)); +} + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syrk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, + a, lda, beta, c, ldc, dependencies)); +} + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, + a, lda, beta, c, ldc, dependencies)); +} + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::herk(queue, detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(trans), n, k, alpha, + a, lda, beta, c, ldc, dependencies)); +} + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::syr2k( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2k( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::her2k( + queue, detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(trans), n, k, + alpha, a, lda, b, ldb, beta, c, ldc, dependencies)); +} + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, dependencies)); +} + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, dependencies)); +} + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - b, ldb, dependencies)); + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, dependencies)); } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - b, ldb, dependencies)); -} - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - b, ldb, dependencies)); -} - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - b, ldb, dependencies)); -} - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trmm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, dependencies)); +} + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, dependencies)); +} + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, dependencies)); +} + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - b, ldb, dependencies)); + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, dependencies)); } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm(queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, lda, - b, ldb, dependencies)); + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(blas_major::trsm( + queue, detail::get_onemkl_side(left_right), detail::get_onemkl_uplo(upper_lower), + detail::get_onemkl_transpose(transa), detail::get_onemkl_diag(unit_diag), m, n, alpha, a, + lda, b, ldb, dependencies)); } diff --git a/src/blas/backends/netlib/netlib_batch.cxx b/src/blas/backends/netlib/netlib_batch.cxx index 7a2839dd4..5af30b80f 100644 --- a/src/blas/backends/netlib/netlib_batch.cxx +++ b/src/blas/backends/netlib/netlib_batch.cxx @@ -19,8 +19,8 @@ // Buffer APIs -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); @@ -30,8 +30,8 @@ void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_ #endif } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); @@ -41,8 +41,8 @@ void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64 #endif } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); @@ -52,8 +52,8 @@ void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, #endif } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); @@ -63,9 +63,9 @@ void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer #endif } -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, - int64_t stridey, int64_t batch_size) { +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, + int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -74,8 +74,8 @@ void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); @@ -85,9 +85,9 @@ void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); @@ -97,9 +97,9 @@ void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); @@ -109,10 +109,10 @@ void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, float beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, float beta, sycl::buffer& y, int64_t incy, + int64_t stride_y, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -121,11 +121,10 @@ void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, floa #endif } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, double beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, - int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, double beta, + sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -134,12 +133,11 @@ void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, doub #endif } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, - int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, - int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, sycl::buffer, 1>& y, + int64_t incy, int64_t stride_y, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -148,11 +146,11 @@ void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, #endif } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &x, - int64_t incx, int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, + sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); @@ -162,10 +160,10 @@ void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, #endif } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, sycl::buffer& c, int64_t ldc, + int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -174,10 +172,10 @@ void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, #endif } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -186,10 +184,10 @@ void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, #endif } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); @@ -199,10 +197,10 @@ void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, #endif } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); @@ -212,11 +210,10 @@ void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -225,11 +222,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, double beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -238,12 +234,11 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, - int64_t ldb, int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, + int64_t stride_b, std::complex beta, sycl::buffer, 1>& c, + int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -252,11 +247,11 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -266,10 +261,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -279,10 +274,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -292,10 +287,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -305,10 +300,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -318,9 +313,9 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); @@ -330,9 +325,9 @@ void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose #endif } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); @@ -342,10 +337,10 @@ void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose #endif } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); @@ -355,10 +350,10 @@ void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose #endif } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); @@ -368,10 +363,9 @@ void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose #endif } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -380,9 +374,9 @@ void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n #endif } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); @@ -392,11 +386,10 @@ void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n #endif } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, + int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -405,10 +398,10 @@ void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n #endif } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); @@ -418,9 +411,9 @@ void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n #endif } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -429,9 +422,9 @@ void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, f #endif } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -440,9 +433,9 @@ void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, d #endif } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); @@ -452,9 +445,9 @@ void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); @@ -464,8 +457,8 @@ void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); @@ -475,8 +468,8 @@ void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, f #endif } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); @@ -486,8 +479,8 @@ void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, d #endif } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); @@ -497,8 +490,8 @@ void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); @@ -508,10 +501,10 @@ void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -520,10 +513,10 @@ void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64 #endif } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -532,11 +525,11 @@ void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64 #endif } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); @@ -546,11 +539,11 @@ void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64 #endif } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); @@ -562,9 +555,9 @@ void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64 // USM APIs -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -573,9 +566,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, - double **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -584,10 +577,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -596,10 +588,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -608,10 +599,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, + std::int64_t stridex, float* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -620,10 +610,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t in #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - std::int64_t stridex, double *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + std::int64_t stridex, double* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -632,10 +621,10 @@ sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t i #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, - int64_t incx, std::int64_t stridex, std::complex *y, int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -644,10 +633,10 @@ sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, - int64_t incx, std::int64_t stridex, std::complex *y, - int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -656,9 +645,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, - int64_t *incx, float **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -667,9 +656,9 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -678,10 +667,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const doub #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -690,10 +679,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alph #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -702,9 +691,9 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alp #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, - int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -713,9 +702,9 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float * #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, - int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -724,10 +713,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -736,10 +725,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -748,11 +737,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, int64_t stride_a, - const float *x, int64_t incx, int64_t stride_x, float beta, float *y, - int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float beta, float* y, int64_t incy, int64_t stride_y, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -761,11 +749,11 @@ sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, int64_t stride_a, - const double *x, int64_t incx, int64_t stride_x, double beta, double *y, - int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, const double* x, + int64_t incx, int64_t stride_x, double beta, double* y, int64_t incy, + int64_t stride_y, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -774,12 +762,12 @@ sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, - int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, + int64_t incy, int64_t stride_y, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -788,12 +776,12 @@ sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, - int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, + int64_t incy, int64_t stride_y, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -802,11 +790,10 @@ sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - float *alpha, const float **a, int64_t *lda, const float **x, - int64_t *incx, float *beta, float **y, int64_t *incy, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, const float** x, int64_t* incx, float* beta, + float** y, int64_t* incy, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -815,11 +802,10 @@ sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_ #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, const double **x, - int64_t *incx, double *beta, double **y, int64_t *incy, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, const double** x, int64_t* incx, + double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -828,11 +814,11 @@ sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_ #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -841,12 +827,11 @@ sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_ #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, const std::complex **x, int64_t *incx, - std::complex *beta, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -855,10 +840,10 @@ sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_ #endif } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const float *a, int64_t lda, int64_t stride_a, const float *x, - int64_t incx, int64_t stride_x, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -867,10 +852,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n #endif } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const double *a, int64_t lda, int64_t stride_a, const double *x, - int64_t incx, int64_t stride_x, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stride_a, const double* x, int64_t incx, + int64_t stride_x, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -879,11 +864,11 @@ sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n #endif } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -892,11 +877,11 @@ sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n #endif } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -905,10 +890,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n #endif } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -917,10 +902,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t #endif } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -929,11 +914,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t #endif } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -942,11 +926,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t #endif } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -955,11 +938,11 @@ sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, - const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const float** a, int64_t* lda, + const float** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -968,11 +951,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, - const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, double* alpha, const double** a, int64_t* lda, + const double** b, int64_t* ldb, double* beta, double** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -981,12 +964,12 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, - const std::complex **b, int64_t *ldb, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -995,12 +978,12 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, - const std::complex **b, int64_t *ldb, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1009,11 +992,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, sycl::half *alpha, const sycl::half **a, - int64_t *lda, const sycl::half **b, int64_t *ldb, sycl::half *beta, - sycl::half **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, sycl::half* alpha, const sycl::half** a, + int64_t* lda, const sycl::half** b, int64_t* ldb, sycl::half* beta, + sycl::half** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1022,11 +1005,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const sycl::half **a, int64_t *lda, - const sycl::half **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const sycl::half** a, int64_t* lda, + const sycl::half** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1035,11 +1018,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1048,11 +1031,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, std::int32_t **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, std::int32_t** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1061,11 +1044,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - int64_t stride_a, const float *b, int64_t ldb, int64_t stride_b, - float beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, int64_t stride_a, + const float* b, int64_t ldb, int64_t stride_b, float beta, float* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1074,11 +1057,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, - int64_t stride_a, const double *b, int64_t ldb, int64_t stride_b, - double beta, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, int64_t stride_a, + const double* b, int64_t ldb, int64_t stride_b, double beta, double* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1087,13 +1070,12 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex beta, std::complex *c, int64_t ldc, - int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1102,13 +1084,12 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex beta, std::complex *c, int64_t ldc, - int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1117,11 +1098,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, - int64_t stride_a, const sycl::half *b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::half *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, + int64_t stride_a, const sycl::half* b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::half* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1130,11 +1111,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, int64_t stride_a, - const sycl::half *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, int64_t stride_a, + const sycl::half* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1143,11 +1124,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1156,11 +1137,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, - std::int32_t *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, + std::int32_t* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1169,11 +1150,10 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, - int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1182,11 +1162,10 @@ sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, - int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1195,11 +1174,11 @@ sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1208,11 +1187,11 @@ sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1221,11 +1200,10 @@ sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, float* alpha, const float** a, + int64_t* lda, float** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1234,11 +1212,10 @@ sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, double* alpha, const double** a, + int64_t* lda, double** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1247,11 +1224,11 @@ sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1260,12 +1237,11 @@ sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1274,10 +1250,10 @@ sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1286,10 +1262,10 @@ sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1298,11 +1274,11 @@ sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1311,11 +1287,11 @@ sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1324,10 +1300,10 @@ sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1336,10 +1312,10 @@ sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, in #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1348,11 +1324,11 @@ sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, in #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex beta, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1361,11 +1337,11 @@ sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, in #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex beta, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1374,10 +1350,10 @@ sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, in #endif } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -1386,10 +1362,10 @@ sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -1398,10 +1374,10 @@ sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -1410,10 +1386,10 @@ sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -1422,9 +1398,9 @@ sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); #endif @@ -1433,9 +1409,9 @@ sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); #endif @@ -1444,10 +1420,10 @@ sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); #endif @@ -1456,10 +1432,10 @@ sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); #endif @@ -1468,11 +1444,11 @@ sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -1481,11 +1457,11 @@ sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb #endif } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -1494,12 +1470,12 @@ sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb #endif } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -1508,12 +1484,12 @@ sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb #endif } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif diff --git a/src/blas/backends/netlib/netlib_common.hpp b/src/blas/backends/netlib/netlib_common.hpp index 51552f2ce..b1848d277 100644 --- a/src/blas/backends/netlib/netlib_common.hpp +++ b/src/blas/backends/netlib/netlib_common.hpp @@ -79,19 +79,19 @@ inline CBLAS_OFFSET convert_to_cblas_offset(offset offsetc) { // host_task automatically uses run_on_host_intel if it is supported by the // compiler. Otherwise, it falls back to single_task. template -static inline auto host_task_internal(H &cgh, F f, int) -> decltype(cgh.host_task(f)) { +static inline auto host_task_internal(H& cgh, F f, int) -> decltype(cgh.host_task(f)) { return cgh.host_task(f); } template -static inline void host_task_internal(H &cgh, F f, long) { +static inline void host_task_internal(H& cgh, F f, long) { #ifndef __SYCL_DEVICE_ONLY__ cgh.template single_task(f); #endif } template -static inline void host_task(H &cgh, F f) { +static inline void host_task(H& cgh, F f) { (void)host_task_internal(cgh, f, 0); } diff --git a/src/blas/backends/netlib/netlib_extensions.cxx b/src/blas/backends/netlib/netlib_extensions.cxx index 8e94cb880..d0c13ebbd 100644 --- a/src/blas/backends/netlib/netlib_extensions.cxx +++ b/src/blas/backends/netlib/netlib_extensions.cxx @@ -19,11 +19,10 @@ // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -32,11 +31,10 @@ void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset of #endif } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, - float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -45,11 +43,10 @@ void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset of #endif } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -58,11 +55,10 @@ void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset of #endif } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, - float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -71,9 +67,9 @@ void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset of #endif } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); @@ -83,9 +79,9 @@ void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose tra #endif } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); @@ -95,10 +91,10 @@ void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose tra #endif } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -107,10 +103,10 @@ void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose tra #endif } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -119,8 +115,8 @@ void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose tra #endif } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -129,8 +125,8 @@ void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float a #endif } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -139,9 +135,9 @@ void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double #endif } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -150,9 +146,9 @@ void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::co #endif } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -161,9 +157,9 @@ void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::co #endif } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -172,9 +168,9 @@ void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float #endif } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -183,9 +179,9 @@ void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double #endif } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -194,9 +190,9 @@ void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::c #endif } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); @@ -206,8 +202,8 @@ void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -216,8 +212,8 @@ void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float a #endif } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -226,8 +222,8 @@ void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double #endif } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -236,8 +232,8 @@ void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::co #endif } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -246,9 +242,9 @@ void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::co #endif } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -257,9 +253,9 @@ void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, #endif } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -268,10 +264,10 @@ void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, #endif } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -280,10 +276,10 @@ void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, #endif } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -294,11 +290,11 @@ void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - const int8_t *a, int64_t lda, int8_t ao, const int8_t *b, int64_t ldb, - int8_t bo, float beta, int32_t *c, int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -307,11 +303,11 @@ sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, #endif } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - const int8_t *a, int64_t lda, int8_t ao, const uint8_t *b, int64_t ldb, - uint8_t bo, float beta, int32_t *c, int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -320,11 +316,11 @@ sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, #endif } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - const uint8_t *a, int64_t lda, uint8_t ao, const int8_t *b, int64_t ldb, - int8_t bo, float beta, int32_t *c, int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -333,11 +329,11 @@ sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, #endif } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - const uint8_t *a, int64_t lda, uint8_t ao, const uint8_t *b, int64_t ldb, - uint8_t bo, float beta, int32_t *c, int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -346,10 +342,10 @@ sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, #endif } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - const float *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -358,10 +354,10 @@ sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transp #endif } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, - const double *b, int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -370,11 +366,11 @@ sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transp #endif } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -383,11 +379,11 @@ sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transp #endif } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -396,9 +392,9 @@ sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transp #endif } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, float* b, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -407,9 +403,9 @@ sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -418,10 +414,10 @@ sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -430,10 +426,10 @@ sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -442,9 +438,9 @@ sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, std::int64_t stridea, float *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, std::int64_t stridea, float* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -453,9 +449,9 @@ sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, std::int64_t stridea, double *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, std::int64_t stridea, double* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -464,10 +460,10 @@ sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -476,10 +472,10 @@ sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -488,9 +484,9 @@ sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -499,9 +495,9 @@ sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -510,9 +506,9 @@ sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -521,9 +517,9 @@ sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -532,10 +528,10 @@ sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float beta, const float *b, - int64_t ldb, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, float beta, const float* b, + int64_t ldb, float* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -544,10 +540,10 @@ sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int6 #endif } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double beta, const double *b, - int64_t ldb, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, double beta, const double* b, + int64_t ldb, double* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -556,11 +552,11 @@ sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int6 #endif } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -569,11 +565,11 @@ sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int6 #endif } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -581,5 +577,3 @@ sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int6 throw unimplemented("blas", "omatadd", "for row_major layout"); #endif } - - diff --git a/src/blas/backends/netlib/netlib_level1.cpp b/src/blas/backends/netlib/netlib_level1.cpp index ec6b1eb09..aa55ec419 100644 --- a/src/blas/backends/netlib/netlib_level1.cpp +++ b/src/blas/backends/netlib/netlib_level1.cpp @@ -43,7 +43,7 @@ inline double abs_val(std::complex val) { return std::abs(val.real()) + std::abs(val.imag()); } -int cblas_isamin(int n, const float *x, int incx) { +int cblas_isamin(int n, const float* x, int incx) { if (n < 1 || incx < 1) { return 0; } @@ -65,7 +65,7 @@ int cblas_isamin(int n, const float *x, int incx) { return min_idx; } -int cblas_idamin(int n, const double *x, int incx) { +int cblas_idamin(int n, const double* x, int incx) { if (n < 1 || incx < 1) { return 0; } @@ -87,7 +87,7 @@ int cblas_idamin(int n, const double *x, int incx) { return min_idx; } -int cblas_icamin(int n, const std::complex *x, int incx) { +int cblas_icamin(int n, const std::complex* x, int incx) { if (n < 1 || incx < 1) { return 0; } @@ -109,7 +109,7 @@ int cblas_icamin(int n, const std::complex *x, int incx) { return min_idx; } -int cblas_izamin(int n, const std::complex *x, int incx) { +int cblas_izamin(int n, const std::complex* x, int incx) { if (n < 1 || incx < 1) { return 0; } @@ -131,7 +131,7 @@ int cblas_izamin(int n, const std::complex *x, int incx) { return min_idx; } -void cblas_csrot(const int n, std::complex *cx, const int incx, std::complex *cy, +void cblas_csrot(const int n, std::complex* cx, const int incx, std::complex* cy, const int incy, const float c, const float s) { if (n < 1) return; @@ -158,7 +158,7 @@ void cblas_csrot(const int n, std::complex *cx, const int incx, std::comp } } -void cblas_zdrot(const int n, std::complex *zx, const int incx, std::complex *zy, +void cblas_zdrot(const int n, std::complex* zx, const int incx, std::complex* zy, const int incy, const double c, const double s) { if (n < 1) return; @@ -185,8 +185,8 @@ void cblas_zdrot(const int n, std::complex *zx, const int incx, std::com } } -void cblas_crotg(std::complex *ca, const std::complex *cb, float *c, - std::complex *s) { +void cblas_crotg(std::complex* ca, const std::complex* cb, float* c, + std::complex* s) { if (std::abs(ca[0]) == 0) { c[0] = 0.0; s[0] = std::complex(1.0, 0.0); @@ -203,8 +203,8 @@ void cblas_crotg(std::complex *ca, const std::complex *cb, float * } } -void cblas_zrotg(std::complex *ca, const std::complex *cb, double *c, - std::complex *s) { +void cblas_zrotg(std::complex* ca, const std::complex* cb, double* c, + std::complex* s) { if (std::abs(ca[0]) == 0) { c[0] = 0.0; s[0] = std::complex(1.0, 0.0); diff --git a/src/blas/backends/netlib/netlib_level1.cxx b/src/blas/backends/netlib/netlib_level1.cxx index 9f953dc5b..5514a86c1 100644 --- a/src/blas/backends/netlib/netlib_level1.cxx +++ b/src/blas/backends/netlib/netlib_level1.cxx @@ -19,9 +19,9 @@ // Buffer APIs -void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -31,9 +31,9 @@ void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -43,9 +43,9 @@ void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void asum(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -55,9 +55,9 @@ void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void asum(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -67,9 +67,9 @@ void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void axpy(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void axpy(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -79,9 +79,9 @@ void axpy(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, }); } -void axpy(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void axpy(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -91,34 +91,34 @@ void axpy(sycl::queue &queue, int64_t n, double alpha, sycl::buffer & }); } -void axpy(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void axpy(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { - ::cblas_caxpy((const int)n, (const void *)&alpha, accessor_x.GET_MULTI_PTR, + ::cblas_caxpy((const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void axpy(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void axpy(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { - ::cblas_zaxpy((const int)n, (const void *)&alpha, accessor_x.GET_MULTI_PTR, + ::cblas_zaxpy((const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -127,8 +127,8 @@ void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x #endif } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -137,9 +137,9 @@ void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer #endif } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -148,9 +148,9 @@ void axpby(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -159,9 +159,9 @@ void axpby(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -171,9 +171,9 @@ void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -183,9 +183,9 @@ void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void copy(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -195,9 +195,9 @@ void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void copy(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -207,9 +207,9 @@ void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -221,9 +221,9 @@ void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, }); } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -235,9 +235,9 @@ void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -249,10 +249,10 @@ void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, }); } -void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { - queue.submit([&](sycl::handler &cgh) { +void dotc(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -264,10 +264,10 @@ void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { - queue.submit([&](sycl::handler &cgh) { +void dotc(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -279,10 +279,10 @@ void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { - queue.submit([&](sycl::handler &cgh) { +void dotu(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -294,10 +294,10 @@ void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { - queue.submit([&](sycl::handler &cgh) { +void dotu(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -309,9 +309,9 @@ void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -320,9 +320,9 @@ void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.template get_access(cgh); auto accessor_result = result.template get_access(cgh); host_task(cgh, [=]() { @@ -331,9 +331,9 @@ void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t in }); } -void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamin(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -342,9 +342,9 @@ void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamin(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -353,9 +353,9 @@ void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> }); } -void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -364,9 +364,9 @@ void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -375,9 +375,9 @@ void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t in }); } -void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamax(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -386,9 +386,9 @@ void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamax(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -397,9 +397,9 @@ void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> }); } -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.template get_access(cgh); auto accessor_result = result.template get_access(cgh); host_task(cgh, [=]() { @@ -409,9 +409,9 @@ void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -421,9 +421,9 @@ void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -433,9 +433,9 @@ void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -445,9 +445,9 @@ void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, float c, float s) { - queue.submit([&](sycl::handler &cgh) { +void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, float c, float s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -457,9 +457,9 @@ void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, }); } -void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, double c, double s) { - queue.submit([&](sycl::handler &cgh) { +void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, double c, double s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -469,9 +469,9 @@ void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, float c, float s) { - queue.submit([&](sycl::handler &cgh) { +void rot(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, float c, float s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -482,9 +482,9 @@ void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, }); } -void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, double c, double s) { - queue.submit([&](sycl::handler &cgh) { +void rot(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, double c, double s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -495,9 +495,9 @@ void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { - queue.submit([&](sycl::handler &cgh) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -509,9 +509,9 @@ void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer }); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { - queue.submit([&](sycl::handler &cgh) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -523,10 +523,10 @@ void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { - queue.submit([&](sycl::handler &cgh) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -538,10 +538,10 @@ void rotg(sycl::queue &queue, sycl::buffer, 1> &a, }); } -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { - queue.submit([&](sycl::handler &cgh) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -553,9 +553,9 @@ void rotg(sycl::queue &queue, sycl::buffer, 1> &a, }); } -void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { - queue.submit([&](sycl::handler &cgh) { +void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_param = param.get_access(cgh); @@ -566,9 +566,9 @@ void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { - queue.submit([&](sycl::handler &cgh) { +void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_param = param.get_access(cgh); @@ -579,9 +579,9 @@ void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m) { - queue.submit([&](sycl::handler &cgh) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param) { + queue.submit([&](sycl::handler& cgh) { auto accessor_d1 = d1.get_access(cgh); auto accessor_d2 = d2.get_access(cgh); auto accessor_x1 = x1.get_access(cgh); @@ -593,9 +593,9 @@ void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m) { - queue.submit([&](sycl::handler &cgh) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param) { + queue.submit([&](sycl::handler& cgh) { auto accessor_d1 = d1.get_access(cgh); auto accessor_d2 = d2.get_access(cgh); auto accessor_x1 = x1.get_access(cgh); @@ -607,8 +607,8 @@ void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void scal(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { ::cblas_sscal((const int)n, (const float)alpha, accessor_x.GET_MULTI_PTR, @@ -617,8 +617,8 @@ void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, }); } -void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void scal(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { ::cblas_dscal((const int)n, (const double)alpha, accessor_x.GET_MULTI_PTR, @@ -627,20 +627,20 @@ void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer & }); } -void scal(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void scal(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { - ::cblas_cscal((const int)n, (const void *)&alpha, accessor_x.GET_MULTI_PTR, + ::cblas_cscal((const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)std::abs(incx)); }); }); } -void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, int64_t n, float alpha, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { ::cblas_csscal((const int)n, (const float)alpha, accessor_x.GET_MULTI_PTR, @@ -649,20 +649,20 @@ void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer alpha, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void scal(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { - ::cblas_zscal((const int)n, (const void *)&alpha, accessor_x.GET_MULTI_PTR, + ::cblas_zscal((const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)std::abs(incx)); }); }); } -void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, int64_t n, double alpha, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { ::cblas_zdscal((const int)n, (const double)alpha, accessor_x.GET_MULTI_PTR, @@ -671,9 +671,9 @@ void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -685,9 +685,9 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, }); } -void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -697,9 +697,9 @@ void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -709,9 +709,9 @@ void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void swap(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -721,9 +721,9 @@ void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void swap(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -735,9 +735,9 @@ void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> & // USM APIs -sycl::event asum(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event asum(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -748,9 +748,9 @@ sycl::event asum(sycl::queue &queue, int64_t n, const float *x, int64_t incx, fl return done; } -sycl::event asum(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event asum(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -761,9 +761,9 @@ sycl::event asum(sycl::queue &queue, int64_t n, const double *x, int64_t incx, d return done; } -sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - float *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event asum(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + float* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -774,9 +774,9 @@ sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - double *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event asum(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + double* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -787,9 +787,9 @@ sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event axpy(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, float *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event axpy(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, float* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -801,9 +801,9 @@ sycl::event axpy(sycl::queue &queue, int64_t n, float alpha, const float *x, int return done; } -sycl::event axpy(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double *y, int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event axpy(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double* y, int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -816,41 +816,41 @@ sycl::event axpy(sycl::queue &queue, int64_t n, double alpha, const double *x, i return done; } -sycl::event axpy(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event axpy(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_caxpy((const int)n, (const void *)&alpha, x, (const int)incx, y, + ::cblas_caxpy((const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy); }); }); return done; } -sycl::event axpy(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event axpy(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_zaxpy((const int)n, (const void *)&alpha, x, (const int)incx, y, + ::cblas_zaxpy((const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy); }); }); return done; } -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -859,9 +859,9 @@ sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, in #endif } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -870,10 +870,10 @@ sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, #endif } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -882,10 +882,10 @@ sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -894,9 +894,9 @@ sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -sycl::event copy(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event copy(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -907,9 +907,9 @@ sycl::event copy(sycl::queue &queue, int64_t n, const float *x, int64_t incx, fl return done; } -sycl::event copy(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event copy(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -920,10 +920,10 @@ sycl::event copy(sycl::queue &queue, int64_t n, const double *x, int64_t incx, d return done; } -sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event copy(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -934,10 +934,10 @@ sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event copy(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -948,9 +948,9 @@ sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, float *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, float* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -962,9 +962,9 @@ sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, con return done; } -sycl::event dot(sycl::queue &queue, int64_t n, const double *x, int64_t incx, const double *y, - int64_t incy, double *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dot(sycl::queue& queue, int64_t n, const double* x, int64_t incx, const double* y, + int64_t incy, double* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -976,9 +976,9 @@ sycl::event dot(sycl::queue &queue, int64_t n, const double *x, int64_t incx, co return done; } -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -990,10 +990,10 @@ sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, con return done; } -sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dotc(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1005,10 +1005,10 @@ sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dotc(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1020,10 +1020,10 @@ sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dotu(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1035,10 +1035,10 @@ sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dotu(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1050,9 +1050,9 @@ sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event iamin(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamin(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1063,9 +1063,9 @@ sycl::event iamin(sycl::queue &queue, int64_t n, const float *x, int64_t incx, i return done; } -sycl::event iamin(sycl::queue &queue, int64_t n, const double *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamin(sycl::queue& queue, int64_t n, const double* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1076,9 +1076,9 @@ sycl::event iamin(sycl::queue &queue, int64_t n, const double *x, int64_t incx, return done; } -sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamin(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1089,9 +1089,9 @@ sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamin(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1102,9 +1102,9 @@ sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, return done; } -sycl::event iamax(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamax(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1115,9 +1115,9 @@ sycl::event iamax(sycl::queue &queue, int64_t n, const float *x, int64_t incx, i return done; } -sycl::event iamax(sycl::queue &queue, int64_t n, const double *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamax(sycl::queue& queue, int64_t n, const double* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1128,9 +1128,9 @@ sycl::event iamax(sycl::queue &queue, int64_t n, const double *x, int64_t incx, return done; } -sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamax(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1141,9 +1141,9 @@ sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamax(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1154,9 +1154,9 @@ sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, return done; } -sycl::event nrm2(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event nrm2(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1167,9 +1167,9 @@ sycl::event nrm2(sycl::queue &queue, int64_t n, const float *x, int64_t incx, fl return done; } -sycl::event nrm2(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event nrm2(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1180,9 +1180,9 @@ sycl::event nrm2(sycl::queue &queue, int64_t n, const double *x, int64_t incx, d return done; } -sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - float *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event nrm2(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + float* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1193,9 +1193,9 @@ sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - double *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event nrm2(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + double* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1206,9 +1206,9 @@ sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event rot(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - float c, float s, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rot(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + float c, float s, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1221,9 +1221,9 @@ sycl::event rot(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, return done; } -sycl::event rot(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - double c, double s, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rot(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + double c, double s, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1236,10 +1236,10 @@ sycl::event rot(sycl::queue &queue, int64_t n, double *x, int64_t incx, double * return done; } -sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, float c, float s, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rot(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, float c, float s, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1252,10 +1252,10 @@ sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t i return done; } -sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, double c, double s, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rot(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, double c, double s, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1268,9 +1268,9 @@ sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t return done; } -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1280,9 +1280,9 @@ sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, return done; } -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1292,9 +1292,9 @@ sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, return done; } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, float *c, - std::complex *s, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1304,9 +1304,9 @@ sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex return done; } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, double *c, - std::complex *s, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1316,9 +1316,9 @@ sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotm(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + float* param, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1330,9 +1330,9 @@ sycl::event rotm(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y return done; } -sycl::event rotm(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - double *param, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotm(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + double* param, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1344,9 +1344,9 @@ sycl::event rotm(sycl::queue &queue, int64_t n, double *x, int64_t incx, double return done; } -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, float* param, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1357,9 +1357,9 @@ sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, return done; } -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, double* param, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1370,9 +1370,9 @@ sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double return done; } -sycl::event scal(sycl::queue &queue, int64_t n, float alpha, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, float alpha, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1384,9 +1384,9 @@ sycl::event scal(sycl::queue &queue, int64_t n, float alpha, float *x, int64_t i return done; } -sycl::event scal(sycl::queue &queue, int64_t n, double alpha, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, double alpha, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1398,23 +1398,23 @@ sycl::event scal(sycl::queue &queue, int64_t n, double alpha, double *x, int64_t return done; } -sycl::event scal(sycl::queue &queue, int64_t n, std::complex alpha, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, std::complex alpha, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_cscal((const int)n, (const void *)&alpha, x, (const int)std::abs(incx)); + ::cblas_cscal((const int)n, (const void*)&alpha, x, (const int)std::abs(incx)); }); }); return done; } -sycl::event scal(sycl::queue &queue, int64_t n, float alpha, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, float alpha, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1426,23 +1426,23 @@ sycl::event scal(sycl::queue &queue, int64_t n, float alpha, std::complex return done; } -sycl::event scal(sycl::queue &queue, int64_t n, std::complex alpha, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, std::complex alpha, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_zscal((const int)n, (const void *)&alpha, x, (const int)std::abs(incx)); + ::cblas_zscal((const int)n, (const void*)&alpha, x, (const int)std::abs(incx)); }); }); return done; } -sycl::event scal(sycl::queue &queue, int64_t n, double alpha, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, double alpha, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1454,10 +1454,10 @@ sycl::event scal(sycl::queue &queue, int64_t n, double alpha, std::complex &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1470,9 +1470,9 @@ sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int6 return done; } -sycl::event swap(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event swap(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1483,9 +1483,9 @@ sycl::event swap(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y return done; } -sycl::event swap(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event swap(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1496,10 +1496,10 @@ sycl::event swap(sycl::queue &queue, int64_t n, double *x, int64_t incx, double return done; } -sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event swap(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1510,10 +1510,10 @@ sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t return done; } -sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event swap(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); diff --git a/src/blas/backends/netlib/netlib_level2.cxx b/src/blas/backends/netlib/netlib_level2.cxx index 156ed133b..8e8d74446 100644 --- a/src/blas/backends/netlib/netlib_level2.cxx +++ b/src/blas/backends/netlib/netlib_level2.cxx @@ -19,10 +19,10 @@ // Buffer APIs -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, float beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, float beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -36,10 +36,10 @@ void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, }); } -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, double beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, double beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -53,46 +53,46 @@ void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, }); } -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_cgbmv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const int)kl, (const int)ku, (const void *)&alpha, + (const int)kl, (const int)ku, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_x.GET_MULTI_PTR, - (const int)incx, (const void *)&beta, accessor_y.GET_MULTI_PTR, + (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zgbmv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const int)kl, (const int)ku, (const void *)&alpha, + (const int)kl, (const int)ku, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_x.GET_MULTI_PTR, - (const int)incx, (const void *)&beta, accessor_y.GET_MULTI_PTR, + (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -105,10 +105,10 @@ void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha }); } -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -121,44 +121,44 @@ void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alph }); } -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_cgemv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zgemv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, int64_t m, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); @@ -170,10 +170,10 @@ void ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, int64_t m, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); @@ -185,142 +185,142 @@ void ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, sycl::buffer alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { - ::cblas_cgerc(MAJOR, (const int)m, (const int)n, (const void *)&alpha, + ::cblas_cgerc(MAJOR, (const int)m, (const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { - ::cblas_zgerc(MAJOR, (const int)m, (const int)n, (const void *)&alpha, + ::cblas_zgerc(MAJOR, (const int)m, (const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { - ::cblas_cgeru(MAJOR, (const int)m, (const int)n, (const void *)&alpha, + ::cblas_cgeru(MAJOR, (const int)m, (const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { - ::cblas_zgeru(MAJOR, (const int)m, (const int)n, (const void *)&alpha, + ::cblas_zgeru(MAJOR, (const int)m, (const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_chbmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, (const int)k, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhbmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, (const int)k, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_chemv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhemv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void her(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { @@ -331,10 +331,10 @@ void her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, }); } -void her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void her(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { @@ -345,78 +345,78 @@ void her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, }); } -void her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { ::cblas_cher2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, + (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { ::cblas_zher2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, + (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, - int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, + int64_t incx, std::complex beta, sycl::buffer, 1>& y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_chpmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_ap.GET_MULTI_PTR, accessor_x.GET_MULTI_PTR, - (const int)incx, (const void *)&beta, accessor_y.GET_MULTI_PTR, + (const void*)&alpha, accessor_ap.GET_MULTI_PTR, accessor_x.GET_MULTI_PTR, + (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, - int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, + int64_t incx, std::complex beta, sycl::buffer, 1>& y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhpmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_ap.GET_MULTI_PTR, accessor_x.GET_MULTI_PTR, - (const int)incx, (const void *)&beta, accessor_y.GET_MULTI_PTR, + (const void*)&alpha, accessor_ap.GET_MULTI_PTR, accessor_x.GET_MULTI_PTR, + (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &ap) { - queue.submit([&](sycl::handler &cgh) { +void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { @@ -427,10 +427,10 @@ void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, }); } -void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &ap) { - queue.submit([&](sycl::handler &cgh) { +void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { @@ -441,42 +441,42 @@ void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, }); } -void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &ap) { - queue.submit([&](sycl::handler &cgh) { +void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { ::cblas_chpr2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, + (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_ap.GET_MULTI_PTR); }); }); } -void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &ap) { - queue.submit([&](sycl::handler &cgh) { +void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhpr2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, + (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_ap.GET_MULTI_PTR); }); }); } -void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -489,10 +489,10 @@ void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alph }); } -void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -505,10 +505,10 @@ void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alp }); } -void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &ap, - sycl::buffer &x, int64_t incx, float beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& ap, + sycl::buffer& x, int64_t incx, float beta, sycl::buffer& y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -521,10 +521,10 @@ void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::bu }); } -void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &ap, sycl::buffer &x, int64_t incx, double beta, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer& ap, sycl::buffer& x, int64_t incx, double beta, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -537,9 +537,9 @@ void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, }); } -void spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &ap) { - queue.submit([&](sycl::handler &cgh) { +void spr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { @@ -550,9 +550,9 @@ void spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buf }); } -void spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &ap) { - queue.submit([&](sycl::handler &cgh) { +void spr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { @@ -563,9 +563,9 @@ void spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::bu }); } -void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &ap) { - queue.submit([&](sycl::handler &cgh) { +void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_ap = ap.get_access(cgh); @@ -577,9 +577,9 @@ void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::bu }); } -void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &ap) { - queue.submit([&](sycl::handler &cgh) { +void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_ap = ap.get_access(cgh); @@ -591,10 +591,10 @@ void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::b }); } -void symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx, float beta, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void symv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx, float beta, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -607,10 +607,10 @@ void symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::bu }); } -void symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx, double beta, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void symv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx, double beta, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -623,9 +623,9 @@ void symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::b }); } -void syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void syr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { @@ -636,9 +636,9 @@ void syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buf }); } -void syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void syr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { @@ -649,10 +649,10 @@ void syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::bu }); } -void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); @@ -665,10 +665,10 @@ void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::bu }); } -void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); @@ -681,10 +681,10 @@ void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::b }); } -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -696,10 +696,10 @@ void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -711,10 +711,10 @@ void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -726,10 +726,10 @@ void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -741,10 +741,10 @@ void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -756,10 +756,10 @@ void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -771,10 +771,10 @@ void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -786,10 +786,10 @@ void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -801,9 +801,9 @@ void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &ap, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& ap, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -814,9 +814,9 @@ void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &ap, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& ap, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -827,10 +827,10 @@ void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -841,10 +841,10 @@ void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -855,9 +855,9 @@ void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &ap, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& ap, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -868,9 +868,9 @@ void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &ap, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& ap, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -881,10 +881,10 @@ void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -895,10 +895,10 @@ void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -909,9 +909,9 @@ void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -922,9 +922,9 @@ void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag }); } -void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -935,10 +935,10 @@ void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag }); } -void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -949,10 +949,10 @@ void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag }); } -void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -963,9 +963,9 @@ void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag }); } -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -976,9 +976,9 @@ void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -989,10 +989,10 @@ void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -1003,10 +1003,10 @@ void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -1019,10 +1019,10 @@ void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, // USM APIs -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - float alpha, const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta, + float* y, int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1036,11 +1036,11 @@ sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int6 return done; } -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - double alpha, const double *a, int64_t lda, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + double alpha, const double* a, int64_t lda, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1054,48 +1054,48 @@ sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int6 return done; } -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cgbmv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const int)kl, (const int)ku, (const void *)&alpha, a, (const int)lda, x, - (const int)incx, (const void *)&beta, y, (const int)incy); + (const int)kl, (const int)ku, (const void*)&alpha, a, (const int)lda, x, + (const int)incx, (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zgbmv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const int)kl, (const int)ku, (const void *)&alpha, a, (const int)lda, x, - (const int)incx, (const void *)&beta, y, (const int)incy); + (const int)kl, (const int)ku, (const void*)&alpha, a, (const int)lda, x, + (const int)incx, (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1109,10 +1109,10 @@ sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, floa return done; } -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1126,48 +1126,48 @@ sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, doub return done; } -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cgemv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zgemv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, const float *x, int64_t incx, - const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event ger(sycl::queue& queue, int64_t m, int64_t n, float alpha, const float* x, int64_t incx, + const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1180,10 +1180,10 @@ sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, const flo return done; } -sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event ger(sycl::queue& queue, int64_t m, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1196,152 +1196,152 @@ sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, const do return done; } -sycl::event gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_cgerc(MAJOR, (const int)m, (const int)n, (const void *)&alpha, x, + ::cblas_cgerc(MAJOR, (const int)m, (const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_zgerc(MAJOR, (const int)m, (const int)n, (const void *)&alpha, x, + ::cblas_zgerc(MAJOR, (const int)m, (const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_cgeru(MAJOR, (const int)m, (const int)n, (const void *)&alpha, x, + ::cblas_cgeru(MAJOR, (const int)m, (const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_zgeru(MAJOR, (const int)m, (const int)n, (const void *)&alpha, x, + ::cblas_zgeru(MAJOR, (const int)m, (const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_chbmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, (const int)k, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zhbmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, (const int)k, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *x, - int64_t incx, std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, const std::complex* x, + int64_t incx, std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_chemv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *x, - int64_t incx, std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, const std::complex* x, + int64_t incx, std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zhemv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - const std::complex *x, int64_t incx, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + const std::complex* x, int64_t incx, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1354,10 +1354,10 @@ sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, return done; } -sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - const std::complex *x, int64_t incx, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + const std::complex* x, int64_t incx, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1370,82 +1370,82 @@ sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cher2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, x, (const int)incx, y, (const int)incy, a, + (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zher2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, x, (const int)incx, y, (const int)incy, a, + (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *ap, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* ap, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_chpmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, ap, x, (const int)incx, (const void *)&beta, y, + (const void*)&alpha, ap, x, (const int)incx, (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *ap, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* ap, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zhpmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, ap, x, (const int)incx, (const void *)&beta, y, + (const void*)&alpha, ap, x, (const int)incx, (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - const std::complex *x, int64_t incx, std::complex *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + const std::complex* x, int64_t incx, std::complex* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1458,10 +1458,10 @@ sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, return done; } -sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - const std::complex *x, int64_t incx, std::complex *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + const std::complex* x, int64_t incx, std::complex* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1474,44 +1474,44 @@ sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_chpr2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, x, (const int)incx, y, (const int)incy, ap); + (const void*)&alpha, x, (const int)incx, y, (const int)incy, ap); }); }); return done; } -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zhpr2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, x, (const int)incx, y, (const int)incy, ap); + (const void*)&alpha, x, (const int)incx, y, (const int)incy, ap); }); }); return done; } -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, float alpha, + const float* a, int64_t lda, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1525,10 +1525,10 @@ sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, flo return done; } -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, double alpha, + const double* a, int64_t lda, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1542,10 +1542,10 @@ sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, dou return done; } -sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *ap, - const float *x, int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* ap, + const float* x, int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1559,10 +1559,10 @@ sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, c return done; } -sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *ap, - const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* ap, + const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1576,9 +1576,9 @@ sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, float *ap, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, float* ap, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1591,9 +1591,9 @@ sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, co return done; } -sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, double *ap, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, double* ap, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1606,10 +1606,10 @@ sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, c return done; } -sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, const float *y, int64_t incy, float *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, const float* y, int64_t incy, float* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1622,10 +1622,10 @@ sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, c return done; } -sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1638,10 +1638,10 @@ sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *a, - int64_t lda, const float *x, int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* a, + int64_t lda, const float* x, int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1655,10 +1655,10 @@ sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, c return done; } -sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *a, - int64_t lda, const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* a, + int64_t lda, const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1672,9 +1672,9 @@ sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, float *a, int64_t lda, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, float* a, int64_t lda, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1687,10 +1687,10 @@ sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, co return done; } -sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, double *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, double* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1703,10 +1703,10 @@ sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, c return done; } -sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1720,10 +1720,10 @@ sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, c return done; } -sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1737,10 +1737,10 @@ sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1754,10 +1754,10 @@ sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1771,10 +1771,10 @@ sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1788,10 +1788,10 @@ sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1805,10 +1805,10 @@ sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1822,10 +1822,10 @@ sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1839,10 +1839,10 @@ sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1856,10 +1856,10 @@ sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1873,10 +1873,10 @@ sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *ap, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* ap, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1889,10 +1889,10 @@ sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *ap, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* ap, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1905,10 +1905,10 @@ sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *ap, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* ap, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1921,10 +1921,10 @@ sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *ap, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* ap, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1937,10 +1937,10 @@ sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *ap, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* ap, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1953,10 +1953,10 @@ sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *ap, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* ap, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1969,10 +1969,10 @@ sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *ap, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* ap, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1985,10 +1985,10 @@ sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *ap, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* ap, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2001,10 +2001,10 @@ sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - const float *a, int64_t lda, float *b, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + const float* a, int64_t lda, float* b, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2018,10 +2018,10 @@ sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag un return done; } -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - const double *a, int64_t lda, double *b, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + const double* a, int64_t lda, double* b, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2035,10 +2035,10 @@ sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag un return done; } -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *b, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* b, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2052,10 +2052,10 @@ sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag un return done; } -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *b, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* b, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2069,10 +2069,10 @@ sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag un return done; } -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2086,10 +2086,10 @@ sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2103,10 +2103,10 @@ sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2120,10 +2120,10 @@ sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); diff --git a/src/blas/backends/netlib/netlib_level3.cxx b/src/blas/backends/netlib/netlib_level3.cxx index 8bb6a04ae..2579e66e1 100644 --- a/src/blas/backends/netlib/netlib_level3.cxx +++ b/src/blas/backends/netlib/netlib_level3.cxx @@ -19,10 +19,10 @@ // Buffer APIs -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -36,10 +36,10 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int }); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, double beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -53,46 +53,46 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int }); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_cgemm(MAJOR, convert_to_cblas_trans(transa), convert_to_cblas_trans(transb), - (const int)m, (const int)n, (const int)k, (const void *)&alpha, + (const int)m, (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, - (const int)ldb, (const void *)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zgemm(MAJOR, convert_to_cblas_trans(transa), convert_to_cblas_trans(transb), - (const int)m, (const int)n, (const int)k, (const void *)&alpha, + (const int)m, (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, - (const int)ldb, (const void *)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - sycl::half alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, sycl::half beta, - sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + sycl::half alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, sycl::half beta, + sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -101,9 +101,9 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int #endif } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -112,9 +112,9 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int #endif } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -123,46 +123,46 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int #endif } -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_chemm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_b.GET_MULTI_PTR, (const int)ldb, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_b.GET_MULTI_PTR, (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhemm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_b.GET_MULTI_PTR, (const int)ldb, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_b.GET_MULTI_PTR, (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer, 1> &a, int64_t lda, float beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer, 1>& a, int64_t lda, float beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { @@ -174,10 +174,10 @@ void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int6 }); } -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, - sycl::buffer, 1> &a, int64_t lda, double beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, + sycl::buffer, 1>& a, int64_t lda, double beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { @@ -189,17 +189,17 @@ void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int6 }); } -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, float beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, float beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_cher2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb, (const float)beta, accessor_c.GET_MULTI_PTR, (const int)ldc); @@ -207,17 +207,17 @@ void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int }); } -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, double beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, double beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zher2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb, (const double)beta, accessor_c.GET_MULTI_PTR, (const int)ldc); @@ -225,10 +225,10 @@ void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int }); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - float beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + float beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -242,10 +242,10 @@ void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int6 }); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - double beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + double beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -259,46 +259,46 @@ void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int6 }); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_csymm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_b.GET_MULTI_PTR, (const int)ldb, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_b.GET_MULTI_PTR, (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zsymm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_b.GET_MULTI_PTR, (const int)ldb, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_b.GET_MULTI_PTR, (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, float beta, sycl::buffer &c, +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, float beta, sycl::buffer& c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { @@ -310,10 +310,10 @@ void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int6 }); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, - sycl::buffer &a, int64_t lda, double beta, sycl::buffer &c, +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, + sycl::buffer& a, int64_t lda, double beta, sycl::buffer& c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { @@ -325,40 +325,40 @@ void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int6 }); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_csyrk(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, - accessor_a.GET_MULTI_PTR, (const int)lda, (const void *)&beta, - accessor_c.GET_MULTI_PTR, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, + (const int)lda, (const void*)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldc); }); }); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zsyrk(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, - accessor_a.GET_MULTI_PTR, (const int)lda, (const void *)&beta, - accessor_c.GET_MULTI_PTR, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, + (const int)lda, (const void*)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldc); }); }); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - float beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + float beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -371,10 +371,10 @@ void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int }); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, double beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -388,46 +388,46 @@ void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int }); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_csyr2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, - (const int)ldb, (const void *)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zsyr2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, - (const int)ldb, (const void *)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -440,10 +440,10 @@ void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans }); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -456,43 +456,43 @@ void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans }); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { ::cblas_ctrmm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb); }); }); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { ::cblas_ztrmm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb); }); }); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -505,10 +505,10 @@ void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans }); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -521,34 +521,34 @@ void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans }); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { ::cblas_ctrsm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb); }); }); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { ::cblas_ztrsm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb); }); }); @@ -556,10 +556,10 @@ void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans // USM APIs -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, - float beta, float *c, int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, + float beta, float* c, int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -573,11 +573,11 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t return done; } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -591,50 +591,48 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t return done; } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cgemm(MAJOR, convert_to_cblas_trans(transa), convert_to_cblas_trans(transb), - (const int)m, (const int)n, (const int)k, (const void *)&alpha, a, - (const int)lda, b, (const int)ldb, (const void *)&beta, c, - (const int)ldc); + (const int)m, (const int)n, (const int)k, (const void*)&alpha, a, + (const int)lda, b, (const int)ldb, (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zgemm(MAJOR, convert_to_cblas_trans(transa), convert_to_cblas_trans(transb), - (const int)m, (const int)n, (const int)k, (const void *)&alpha, a, - (const int)lda, b, (const int)ldb, (const void *)&beta, c, - (const int)ldc); + (const int)m, (const int)n, (const int)k, (const void*)&alpha, a, + (const int)lda, b, (const int)ldb, (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, const sycl::half *b, - int64_t ldb, sycl::half beta, sycl::half *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, const sycl::half* b, + int64_t ldb, sycl::half beta, sycl::half* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -643,10 +641,10 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, const sycl::half *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, const sycl::half* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -655,10 +653,10 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const bfloat16 *a, int64_t lda, const bfloat16 *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const bfloat16* a, int64_t lda, const bfloat16* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -667,12 +665,12 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -680,19 +678,19 @@ sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t host_task(cgh, [=]() { ::cblas_chemm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb, - (const void *)&beta, c, (const int)ldc); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -700,18 +698,18 @@ sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t host_task(cgh, [=]() { ::cblas_zhemm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb, - (const void *)&beta, c, (const int)ldc); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const std::complex *a, int64_t lda, float beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const std::complex* a, int64_t lda, float beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -725,11 +723,11 @@ sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const std::complex *a, int64_t lda, double beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const std::complex* a, int64_t lda, double beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -743,46 +741,46 @@ sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, float beta, std::complex *c, - int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, float beta, std::complex* c, + int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cher2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, b, + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, b, (const int)ldb, (const float)beta, c, (const int)ldc); }); }); return done; } -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, double beta, std::complex *c, - int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, double beta, std::complex* c, + int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zher2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, b, + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, b, (const int)ldb, (const double)beta, c, (const int)ldc); }); }); return done; } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, float beta, - float *c, int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, float beta, + float* c, int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -797,11 +795,11 @@ sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t return done; } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, const double *b, int64_t ldb, - double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, const double* b, int64_t ldb, + double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -816,12 +814,12 @@ sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t return done; } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -829,19 +827,19 @@ sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t host_task(cgh, [=]() { ::cblas_csymm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb, - (const void *)&beta, c, (const int)ldc); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -849,17 +847,17 @@ sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t host_task(cgh, [=]() { ::cblas_zsymm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb, - (const void *)&beta, c, (const int)ldc); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -873,10 +871,10 @@ sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -890,46 +888,46 @@ sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_csyrk(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, - (const void *)&beta, c, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zsyrk(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, - (const void *)&beta, c, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, float beta, - float *c, int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, float beta, + float* c, int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -943,11 +941,11 @@ sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, const double *b, int64_t ldb, - double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, const double* b, int64_t ldb, + double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -961,48 +959,48 @@ sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_csyr2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, b, - (const int)ldb, (const void *)&beta, c, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, b, + (const int)ldb, (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zsyr2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, b, - (const int)ldb, (const void *)&beta, c, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, b, + (const int)ldb, (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, int64_t lda, - float *b, int64_t ldb, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, int64_t lda, + float* b, int64_t ldb, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1017,10 +1015,10 @@ sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpos return done; } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, int64_t lda, - double *b, int64_t ldb, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, int64_t lda, + double* b, int64_t ldb, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1035,11 +1033,11 @@ sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpos return done; } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1048,17 +1046,17 @@ sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpos ::cblas_ctrmm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb); }); }); return done; } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1067,16 +1065,16 @@ sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpos ::cblas_ztrmm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb); }); }); return done; } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, int64_t lda, - float *b, int64_t ldb, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, int64_t lda, + float* b, int64_t ldb, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1091,10 +1089,10 @@ sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpos return done; } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, int64_t lda, - double *b, int64_t ldb, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, int64_t lda, + double* b, int64_t ldb, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1109,11 +1107,11 @@ sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpos return done; } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1122,17 +1120,17 @@ sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpos ::cblas_ctrsm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb); }); }); return done; } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1141,7 +1139,7 @@ sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpos ::cblas_ztrsm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb); }); }); return done; diff --git a/src/blas/backends/portblas/portblas_batch.cxx b/src/blas/backends/portblas/portblas_batch.cxx index e48e60890..75b1a115b 100644 --- a/src/blas/backends/portblas/portblas_batch.cxx +++ b/src/blas/backends/portblas/portblas_batch.cxx @@ -19,999 +19,1002 @@ // Buffer APIs -void syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, +void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "syrk_batch", ""); } -void syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, +void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "syrk_batch", ""); } -void syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "syrk_batch", ""); } -void syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +void syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "syrk_batch", ""); } -void gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "gemv_batch", ""); } -void gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, double beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "gemv_batch", ""); } -void gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "gemv_batch", ""); } -void gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "gemv_batch", ""); } -void dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", ""); } -void dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", ""); } -void dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", ""); } -void dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", ""); } -void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "axpy_batch", ""); } -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "axpy_batch", ""); } -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "copy_batch", ""); } -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "copy_batch", ""); } -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "copy_batch", ""); } -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "copy_batch", ""); } -void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for complex"); } -void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for complex"); } -void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for complex"); } -void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for unsupported dtype"); } -void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for unsupported dtype"); } -void gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for unsupported dtype"); } -void trsm_batch(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "trsm_batch", ""); } -void trsm_batch(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "trsm_batch", ""); } -void trsm_batch(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "trsm_batch", ""); } -void trsm_batch(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +void trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "trsm_batch", ""); } -void omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, +void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", ""); } -void omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, +void omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", ""); } -void imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", ""); } -void imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", ""); } -void imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, +void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", ""); } -void imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, +void imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", ""); } -void omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", ""); } -void omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, +void omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, + oneapi::math::transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", ""); } // USM APIs -sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, +sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, +sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, +sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::math::uplo upper_lower, +sycl::event syrk_batch(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, std::int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, std::int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, +sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, +sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float *beta, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float* beta, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double *beta, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double* beta, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, - std::int64_t n, const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, float *c, +sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, - std::int64_t n, const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, double *c, +sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *c, std::int64_t ldc, +sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *c, std::int64_t ldc, +sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, - std::int64_t *n, const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, + std::int64_t* n, const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, - std::int64_t *n, const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, + std::int64_t* n, const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, + std::int64_t* n, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, + std::int64_t* n, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, double *alpha, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, std::int64_t *lda, - const float **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, + const float** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, std::int64_t *lda, - const double **b, std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, + const double** b, std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose *transa, - oneapi::math::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose* transa, + oneapi::math::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, double beta, double *c, std::int64_t ldc, + std::int64_t k, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, const double* b, std::int64_t ldb, + std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + std::int64_t k, sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, + std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side left_right, +sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side left_right, +sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side left_right, +sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side left_right, +sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side *left_right, - oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - oneapi::math::diag *unit_diag, std::int64_t *m, std::int64_t *n, float *alpha, - const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right, + oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + oneapi::math::diag* unit_diag, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side *left_right, - oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - oneapi::math::diag *unit_diag, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right, + oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + oneapi::math::diag* unit_diag, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side *left_right, - oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - oneapi::math::diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right, + oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + oneapi::math::diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::math::side *left_right, - oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - oneapi::math::diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, oneapi::math::side* left_right, + oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + oneapi::math::diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, +sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", " for USM"); } -sycl::event omatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, +sycl::event omatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", " for USM"); } -sycl::event imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", " for USM"); } -sycl::event imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", " for USM"); } -sycl::event imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, +sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", " for USM"); } -sycl::event imatcopy_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, +sycl::event imatcopy_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", " for USM"); } -sycl::event omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - float beta, const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + float beta, const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - double beta, const double *b, std::int64_t ldb, std::int64_t stride_b, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + double beta, const double* b, std::int64_t ldb, std::int64_t stride_b, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", " for USM"); } -sycl::event omatadd_batch(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event omatadd_batch(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_common.hpp b/src/blas/backends/portblas/portblas_common.hpp index 27d68b3d4..c2713c2f5 100644 --- a/src/blas/backends/portblas/portblas_common.hpp +++ b/src/blas/backends/portblas/portblas_common.hpp @@ -54,9 +54,9 @@ template struct portblas_type; #define DEF_PORTBLAS_TYPE(onemath_t, portblas_t) \ - template <> \ + template <> \ struct portblas_type { \ - using type = portblas_t; \ + using type = portblas_t; \ }; DEF_PORTBLAS_TYPE(sycl::queue, handle_t) @@ -181,7 +181,7 @@ struct throw_if_unsupported_by_device { if (checkTypeInPack) { if (!q.get_info().has(AspectVal)) { throw math::unsupported_device("blas", message, - q.get_info()); + q.get_info()); } } } diff --git a/src/blas/backends/portblas/portblas_gemm_bias.cxx b/src/blas/backends/portblas/portblas_gemm_bias.cxx index 2af4d0bd2..83e39b2d9 100644 --- a/src/blas/backends/portblas/portblas_gemm_bias.cxx +++ b/src/blas/backends/portblas/portblas_gemm_bias.cxx @@ -19,72 +19,72 @@ // Buffer APIs -void gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + float alpha, sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", ""); } -void gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + float alpha, sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", ""); } -void gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + float alpha, sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", ""); } -void gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + float alpha, sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", ""); } // USM APIs -sycl::event gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, - std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", " for USM"); } -sycl::event gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int8_t ao, const std::int8_t *b, std::int64_t ldb, - std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int8_t ao, const std::int8_t* b, std::int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", " for USM"); } -sycl::event gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::uint8_t *a, - std::int64_t lda, std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, - std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::uint8_t* a, + std::int64_t lda, std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", " for USM"); } -sycl::event gemm_bias(sycl::queue &queue, oneapi::math::transpose transa, +sycl::event gemm_bias(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::uint8_t *a, - std::int64_t lda, std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, - std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::uint8_t* a, + std::int64_t lda, std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_level1.cxx b/src/blas/backends/portblas/portblas_level1.cxx index 0a0af855c..6d1f39463 100644 --- a/src/blas/backends/portblas/portblas_level1.cxx +++ b/src/blas/backends/portblas/portblas_level1.cxx @@ -19,91 +19,91 @@ // Buffer APIs -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { throw unimplemented("blas", "dotc", ""); } -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { throw unimplemented("blas", "dotu", ""); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { CALL_PORTBLAS_FN(::blas::_iamax, queue, n, x, incx, result); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "iamax", ""); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { CALL_PORTBLAS_FN(::blas::_iamin, queue, n, x, incx, result); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "iamin", ""); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "asum", ""); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { // portBLAS asum implementation requires that result is initialized to zero // before performing the computation. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); CALL_PORTBLAS_FN(::blas::_asum, queue, n, x, incx, result); } -void axpy(sycl::queue &queue, std::int64_t n, real_t alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, real_t alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_axpy, queue, n, alpha, x, incx, y, incy); } -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "axpy", "for complex"); } -void axpby(sycl::queue &queue, std::int64_t n, real_t alpha, sycl::buffer &x, - std::int64_t incx, real_t beta, sycl::buffer &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, real_t alpha, sycl::buffer& x, + std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { throw unimplemented("blas", "axpby", ""); } -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "axpby", ""); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_copy, queue, n, x, incx, y, incy); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "copy", " for complex."); } -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { // portBLAS dot implementation requires that result is initialized to zero // before performing the computation. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); @@ -111,288 +111,288 @@ void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::in } #ifdef ENABLE_MIXED_PRECISION_WITH_DOUBLE -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", " for unmatched return type"); } #endif -void sdsdot(sycl::queue &queue, std::int64_t n, real_t sb, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void sdsdot(sycl::queue& queue, std::int64_t n, real_t sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { // portBLAS sdsdot implementation requires that result is initialized to zero // before performing the computation. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); CALL_PORTBLAS_FN(::blas::_sdsdot, queue, n, sb, x, incx, y, incy, result); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "nrm2", " for complex"); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { // portBLAS nrm2 implementation requires that result is initialized to zero // before performing the computation. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); CALL_PORTBLAS_FN(::blas::_nrm2, queue, n, x, incx, result); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, real_t c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, real_t c, real_t s) { throw unimplemented("blas", "rot", " for complex"); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, real_t c, real_t s) { +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, real_t c, real_t s) { CALL_PORTBLAS_FN(::blas::_rot, queue, n, x, incx, y, incy, c, s); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { CALL_PORTBLAS_FN(::blas::_rotg, queue, a, b, c, s); } -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { throw unimplemented("blas", "rotg", " for complex"); } -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param) { CALL_PORTBLAS_FN(::blas::_rotm, queue, n, x, incx, y, incy, param); } -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, real_t y1, sycl::buffer ¶m) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, real_t y1, sycl::buffer& param) { CALL_PORTBLAS_FN(::blas::_rotmg, queue, d1, d2, x1, y1, param); } -void scal(sycl::queue &queue, std::int64_t n, real_t alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, real_t alpha, sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_scal, queue, n, alpha, x, incx); } -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "scal", " for complex"); } -void scal(sycl::queue &queue, std::int64_t n, real_t alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, real_t alpha, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "scal", " for complex"); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_swap, queue, n, x, incx, y, incy); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "swap", " for complex"); } // USM APIs -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { throw unimplemented("blas", "dotc", " for USM"); } -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { throw unimplemented("blas", "dotu", " for USM"); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_iamax, queue, n, x, incx, result, dependencies); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { throw unimplemented("blas", "iamax", " for USM"); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_iamin, queue, n, x, incx, result, dependencies); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { throw unimplemented("blas", "iamin", " for USM"); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, real_t *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, real_t* result, const std::vector& dependencies) { throw unimplemented("blas", "asum", " for USM"); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - real_t *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + real_t* result, const std::vector& dependencies) { // portBLAS asum implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); + [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.push_back(init_res_val); CALL_PORTBLAS_USM_FN(::blas::_asum, queue, n, x, incx, result, new_dependencies); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, real_t alpha, const real_t *x, - std::int64_t incx, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(sycl::queue& queue, std::int64_t n, real_t alpha, const real_t* x, + std::int64_t incx, real_t* y, std::int64_t incy, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_axpy, queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "axpy", " for USM"); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, real_t alpha, const real_t *x, - std::int64_t incx, const real_t beta, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, std::int64_t n, real_t alpha, const real_t* x, + std::int64_t incx, const real_t beta, real_t* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", " for USM"); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", " for USM"); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, real_t *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, real_t* y, + std::int64_t incy, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_copy, queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "copy", " for USM"); } -sycl::event dot(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - const real_t *y, std::int64_t incy, real_t *result, - const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + const real_t* y, std::int64_t incy, real_t* result, + const std::vector& dependencies) { // portBLAS dot implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); + [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.emplace_back(init_res_val); CALL_PORTBLAS_USM_FN(::blas::_dot, queue, n, x, incx, y, incy, result, new_dependencies); } #ifdef ENABLE_MIXED_PRECISION_WITH_DOUBLE -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { throw unimplemented("blas", "dot", " for USM"); } #endif -sycl::event sdsdot(sycl::queue &queue, std::int64_t n, real_t sb, const real_t *x, - std::int64_t incx, const real_t *y, std::int64_t incy, real_t *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, std::int64_t n, real_t sb, const real_t* x, + std::int64_t incx, const real_t* y, std::int64_t incy, real_t* result, + const std::vector& dependencies) { // portBLAS sdsdot implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); + [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.emplace_back(init_res_val); CALL_PORTBLAS_USM_FN(::blas::_sdsdot, queue, n, sb, x, incx, y, incy, result, new_dependencies); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, real_t *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, real_t* result, const std::vector& dependencies) { throw unimplemented("blas", "nrm2", " for USM"); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - real_t *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + real_t* result, const std::vector& dependencies) { // portBLAS nrm2 implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); + [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.push_back(init_res_val); CALL_PORTBLAS_USM_FN(::blas::_nrm2, queue, n, x, incx, result, new_dependencies); } -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, real_t c, real_t s, - const std::vector &dependencies) { +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, real_t c, real_t s, + const std::vector& dependencies) { throw unimplemented("blas", "rot", " for USM"); } -sycl::event rot(sycl::queue &queue, std::int64_t n, real_t *x, std::int64_t incx, real_t *y, +sycl::event rot(sycl::queue& queue, std::int64_t n, real_t* x, std::int64_t incx, real_t* y, std::int64_t incy, real_t c, real_t s, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_rot, queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rotg(sycl::queue &queue, real_t *a, real_t *b, real_t *c, real_t *s, - const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, real_t* a, real_t* b, real_t* c, real_t* s, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_rotg, queue, a, b, c, s, dependencies); } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, real_t *c, - std::complex *s, const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, real_t* c, + std::complex* s, const std::vector& dependencies) { throw unimplemented("blas", "rotg", " for USM"); } -sycl::event rotm(sycl::queue &queue, std::int64_t n, real_t *x, std::int64_t incx, real_t *y, - std::int64_t incy, real_t *param, const std::vector &dependencies) { +sycl::event rotm(sycl::queue& queue, std::int64_t n, real_t* x, std::int64_t incx, real_t* y, + std::int64_t incy, real_t* param, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_rotm, queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotmg(sycl::queue &queue, real_t *d1, real_t *d2, real_t *x1, real_t y1, real_t *param, - const std::vector &dependencies) { +sycl::event rotmg(sycl::queue& queue, real_t* d1, real_t* d2, real_t* x1, real_t y1, real_t* param, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_rotmg, queue, d1, d2, x1, y1, param, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, real_t alpha, real_t *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, real_t alpha, real_t* x, std::int64_t incx, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_scal, queue, n, alpha, x, incx, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "scal", " for USM"); } -sycl::event scal(sycl::queue &queue, std::int64_t n, real_t alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, real_t alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "scal", " for USM"); } -sycl::event swap(sycl::queue &queue, std::int64_t n, real_t *x, std::int64_t incx, real_t *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, real_t* x, std::int64_t incx, real_t* y, + std::int64_t incy, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_swap, queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "swap", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_level2.cxx b/src/blas/backends/portblas/portblas_level2.cxx index 8d01deec2..e1bc4766a 100644 --- a/src/blas/backends/portblas/portblas_level2.cxx +++ b/src/blas/backends/portblas/portblas_level2.cxx @@ -19,452 +19,452 @@ // Buffer APIs -void gemv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, real_t beta, sycl::buffer &y, std::int64_t incy) { +void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_gemv, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "gemv", " for complex"); } -void gbmv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, real_t alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, real_t beta, - sycl::buffer &y, std::int64_t incy) { +void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, real_t alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, real_t beta, + sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_gbmv, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, +void gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "gbmv", " for complex"); } -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { CALL_PORTBLAS_FN(::blas::_ger, queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { throw unimplemented("blas", "gerc", ""); } -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { throw unimplemented("blas", "geru", ""); } -void hbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "hbmv", ""); } -void hemv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "hemv", ""); } -void her(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { throw unimplemented("blas", "her", ""); } -void her2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { throw unimplemented("blas", "her2", ""); } -void hpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "hpmv", ""); } -void hpr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { throw unimplemented("blas", "hpr", ""); } -void hpr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { throw unimplemented("blas", "hpr2", ""); } -void sbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, - real_t alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, real_t beta, sycl::buffer &y, std::int64_t incy) { +void sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, + real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_sbmv, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void symv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, real_t beta, sycl::buffer &y, std::int64_t incy) { +void symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void syr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { CALL_PORTBLAS_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { CALL_PORTBLAS_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void spmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, real_t beta, - sycl::buffer &y, std::int64_t incy) { +void spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, real_t beta, + sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { CALL_PORTBLAS_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a); } -void spr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { CALL_PORTBLAS_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +void tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "tbmv", ""); } -void tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +void tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "tbsv", ""); } -void tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "tpmv", ""); } -void tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "tpsv", ""); } -void trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "trmv", " for complex"); } -void trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "trsv", ""); } // USM APIs -sycl::event gemv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, const real_t *a, std::int64_t lda, const real_t *x, - std::int64_t incx, real_t beta, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, const real_t* a, std::int64_t lda, const real_t* x, + std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gemv, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "gemv", " for USM"); } -sycl::event gbmv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, real_t alpha, const real_t *a, std::int64_t lda, - const real_t *x, std::int64_t incx, real_t beta, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, real_t alpha, const real_t* a, std::int64_t lda, + const real_t* x, std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gbmv, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, +sycl::event gbmv(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "gbmv", " for USM"); } -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, real_t alpha, const real_t *x, - std::int64_t incx, const real_t *y, std::int64_t incy, real_t *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, real_t alpha, const real_t* x, + std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a, std::int64_t lda, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_ger, queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { throw unimplemented("blas", "gerc", " for USM"); } -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { throw unimplemented("blas", "geru", " for USM"); } -sycl::event hbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "hbmv", " for USM"); } -sycl::event hemv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "hemv", " for USM"); } -sycl::event her(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "her", " for USM"); } -sycl::event her2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "her2", " for USM"); } -sycl::event hpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "hpmv", " for USM"); } -sycl::event hpr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies) { throw unimplemented("blas", "hpr", " for USM"); } -sycl::event hpr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { throw unimplemented("blas", "hpr2", " for USM"); } -sycl::event sbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, - real_t alpha, const real_t *a, std::int64_t lda, const real_t *x, - std::int64_t incx, real_t beta, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, + real_t alpha, const real_t* a, std::int64_t lda, const real_t* x, + std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_sbmv, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event symv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *a, std::int64_t lda, const real_t *x, std::int64_t incx, real_t beta, - real_t *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event symv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* a, std::int64_t lda, const real_t* x, std::int64_t incx, real_t beta, + real_t* y, std::int64_t incy, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event syr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *x, std::int64_t incx, real_t *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* x, std::int64_t incx, real_t* a, std::int64_t lda, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *x, std::int64_t incx, const real_t *y, std::int64_t incy, real_t *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event syr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a, + std::int64_t lda, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event spmv(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *a, const real_t *x, std::int64_t incx, real_t beta, real_t *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* a, const real_t* x, std::int64_t incx, real_t beta, real_t* y, + std::int64_t incy, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spr(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *x, std::int64_t incx, real_t *a, - const std::vector &dependencies) { +sycl::event spr(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* x, std::int64_t incx, real_t* a, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr2(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *x, std::int64_t incx, const real_t *y, std::int64_t incy, real_t *a, - const std::vector &dependencies) { +sycl::event spr2(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, const real_t *a, - std::int64_t lda, real_t *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a, + std::int64_t lda, real_t* x, std::int64_t incx, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +sycl::event tbmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "tbmv", " for USM"); } -sycl::event tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, const real_t *a, - std::int64_t lda, real_t *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a, + std::int64_t lda, real_t* x, std::int64_t incx, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +sycl::event tbsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "tbsv", " for USM"); } -sycl::event tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, const real_t *a, real_t *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, real_t* x, + std::int64_t incx, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tpmv", " for USM"); } -sycl::event tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, const real_t *a, real_t *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, real_t* x, + std::int64_t incx, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tpsv", " for USM"); } -sycl::event trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, const real_t *a, std::int64_t lda, - real_t *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda, + real_t* x, std::int64_t incx, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "trmv", " for USM"); } -sycl::event trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, const real_t *a, std::int64_t lda, - real_t *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda, + real_t* x, std::int64_t incx, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - oneapi::math::diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + oneapi::math::diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "trsv", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_level3.cxx b/src/blas/backends/portblas/portblas_level3.cxx index 9887d270d..d1aa32652 100644 --- a/src/blas/backends/portblas/portblas_level3.cxx +++ b/src/blas/backends/portblas/portblas_level3.cxx @@ -19,19 +19,19 @@ // Buffer APIs -void gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, real_t beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, real_t beta, + sycl::buffer& c, std::int64_t ldc) { CALL_PORTBLAS_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { using sycl_complex_real_t = sycl::ext::oneapi::experimental::complex; if (transa == oneapi::math::transpose::conjtrans || transb == oneapi::math::transpose::conjtrans) { @@ -63,184 +63,184 @@ void gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::tran queue.copy(out_pb_acc, out_acc); } -void symm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, real_t beta, - sycl::buffer &c, std::int64_t ldc) { +void symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, real_t beta, + sycl::buffer& c, std::int64_t ldc) { CALL_PORTBLAS_FN(::blas::_symm, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +void symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "symm", ""); } -void hemm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +void hemm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "hemm", ""); } -void syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer &a, - std::int64_t lda, real_t beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer& a, + std::int64_t lda, real_t beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "syrk", ""); } -void syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +void syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "syrk", ""); } -void herk(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer, 1> &a, - std::int64_t lda, real_t beta, sycl::buffer, 1> &c, +void herk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer, 1>& a, + std::int64_t lda, real_t beta, sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "herk", ""); } -void syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, real_t beta, - sycl::buffer &c, std::int64_t ldc) { +void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, real_t beta, + sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "syr2k", ""); } -void syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +void syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "syr2k", ""); } -void her2k(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +void her2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, real_t beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, real_t beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "her2k", ""); } -void trmm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - real_t alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb) { +void trmm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, + std::int64_t n, real_t alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { throw unimplemented("blas", "trmm", ""); } -void trmm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { +void trmm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { throw unimplemented("blas", "trmm", ""); } -void trsm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - real_t alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb) { +void trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, + std::int64_t n, real_t alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { CALL_PORTBLAS_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { +void trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { throw unimplemented("blas", "trsm", " for complex"); } -void gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, +void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, real_t alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, real_t beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, real_t beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemmt", ""); } -void gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, +void gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "gemmt", ""); } -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { CALL_PORTBLAS_FN(::blas::_omatcopy, queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { throw unimplemented("blas", "omatcopy", ""); } -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { CALL_PORTBLAS_FN(::blas::_omatcopy2, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { throw unimplemented("blas", "imatcopy", ""); } -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { throw unimplemented("blas", "imatcopy", ""); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - real_t alpha, sycl::buffer &a, std::int64_t lda, real_t beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + real_t alpha, sycl::buffer& a, std::int64_t lda, real_t beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc) { CALL_PORTBLAS_FN(::blas::_omatadd, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "omatadd", ""); } // USM APIs -sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, const real_t *a, - std::int64_t lda, const real_t *b, std::int64_t ldb, real_t beta, real_t *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, const real_t* a, + std::int64_t lda, const real_t* b, std::int64_t ldb, real_t beta, real_t* c, + std::int64_t ldc, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { if (transa == oneapi::math::transpose::conjtrans || transb == oneapi::math::transpose::conjtrans) { throw unimplemented("blas", "gemm", "Conjugate Transpose unsupported yet on portBLAS"); @@ -249,203 +249,204 @@ sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::mat c, ldc, dependencies); } -sycl::event symm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - std::int64_t m, std::int64_t n, real_t alpha, const real_t *a, std::int64_t lda, - const real_t *b, std::int64_t ldb, real_t beta, real_t *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + std::int64_t m, std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, + const real_t* b, std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_symm, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +sycl::event symm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "symm", " for USM"); } -sycl::event hemm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +sycl::event hemm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "hemm", " for USM"); } -sycl::event syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, const real_t *a, std::int64_t lda, - real_t beta, real_t *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, const real_t* a, std::int64_t lda, + real_t beta, real_t* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "syrk", " for USM"); } -sycl::event syrk(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +sycl::event syrk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "syrk", " for USM"); } -sycl::event herk(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, const std::complex *a, - std::int64_t lda, real_t beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, const std::complex* a, + std::int64_t lda, real_t beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "herk", " for USM"); } -sycl::event syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, const real_t *a, std::int64_t lda, - const real_t *b, std::int64_t ldb, real_t beta, real_t *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, const real_t* a, std::int64_t lda, + const real_t* b, std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "syr2k", " for USM"); } -sycl::event syr2k(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +sycl::event syr2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "syr2k", " for USM"); } -sycl::event her2k(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, +sycl::event her2k(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, real_t beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, real_t beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "her2k", " for USM"); } -sycl::event trmm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, real_t alpha, const real_t *a, std::int64_t lda, real_t *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t* b, + std::int64_t ldb, const std::vector& dependencies) { throw unimplemented("blas", "trmm", " for USM"); } -sycl::event trmm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +sycl::event trmm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "trmm", " for USM"); } -sycl::event trsm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, real_t alpha, const real_t *a, std::int64_t lda, real_t *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t* b, + std::int64_t ldb, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, +sycl::event trsm(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "trsm", " for USM"); } -sycl::event gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t n, std::int64_t k, real_t alpha, - const real_t *a, std::int64_t lda, const real_t *b, std::int64_t ldb, real_t beta, - real_t *c, std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, + std::int64_t k, real_t alpha, const real_t* a, std::int64_t lda, const real_t* b, + std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", " for USM"); } -sycl::event gemmt(sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, oneapi::math::uplo upper_lower, + oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", " for USM"); } -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, const real_t *a, std::int64_t lda, real_t *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, const real_t* a, std::int64_t lda, real_t* b, std::int64_t ldb, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatcopy, queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy", "for USM"); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, const real_t *a, std::int64_t lda, std::int64_t stridea, - real_t *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, const real_t* a, std::int64_t lda, std::int64_t stridea, + real_t* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatcopy2, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", "for USM"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, real_t *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, real_t* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", ""); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", ""); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, real_t alpha, const real_t *a, std::int64_t lda, real_t beta, - const real_t *b, std::int64_t ldb, real_t *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t beta, + const real_t* b, std::int64_t ldb, real_t* c, std::int64_t ldc, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatadd, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "omatadd", ""); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - real_t *alpha, const real_t **a, int64_t *lda, real_t **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + real_t* alpha, const real_t** a, int64_t* lda, real_t** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", ""); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", ""); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - real_t *alpha, real_t **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + real_t* alpha, real_t** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", ""); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", ""); } diff --git a/src/blas/backends/portblas/portblas_level3_bfloat16.cpp b/src/blas/backends/portblas/portblas_level3_bfloat16.cpp index d432647d6..7721f58c9 100644 --- a/src/blas/backends/portblas/portblas_level3_bfloat16.cpp +++ b/src/blas/backends/portblas/portblas_level3_bfloat16.cpp @@ -33,20 +33,20 @@ namespace portblas { namespace column_major { // BUFFER -void gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " for bfloat16"); } // USM -sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const oneapi::math::bfloat16 *a, std::int64_t lda, const oneapi::math::bfloat16 *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + const oneapi::math::bfloat16* a, std::int64_t lda, const oneapi::math::bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } @@ -54,20 +54,20 @@ sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::mat namespace row_major { // BUFFER -void gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " for bfloat16"); } // USM -sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const oneapi::math::bfloat16 *a, std::int64_t lda, const oneapi::math::bfloat16 *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + const oneapi::math::bfloat16* a, std::int64_t lda, const oneapi::math::bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_level3_half.cpp b/src/blas/backends/portblas/portblas_level3_half.cpp index 65570e5d9..dbd71ab4b 100644 --- a/src/blas/backends/portblas/portblas_level3_half.cpp +++ b/src/blas/backends/portblas/portblas_level3_half.cpp @@ -33,66 +33,66 @@ namespace portblas { namespace column_major { // BUFFER -void gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " half"); } -void gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " for different argument data types"); } // USM -sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } -sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } } // namespace column_major namespace row_major { // BUFFER -void gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " half"); } -void gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +void gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " for different argument data types"); } // USM -sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } -sycl::event gemm(sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } diff --git a/src/blas/backends/rocblas/rocblas_batch.cpp b/src/blas/backends/rocblas/rocblas_batch.cpp index 64338b2a3..b6e550724 100644 --- a/src/blas/backends/rocblas/rocblas_batch.cpp +++ b/src/blas/backends/rocblas/rocblas_batch.cpp @@ -28,7 +28,7 @@ // Helper Functions template -static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf, const int64_t len, +static inline void conj_vector(sycl::handler& cgh, sycl::buffer& buf, const int64_t len, const int64_t inc, const int64_t stride, const int64_t batch_size) { const auto abs_inc = std::abs(inc); const auto abs_stride = std::abs(stride); @@ -40,7 +40,7 @@ static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf, const i }); } template -static inline void conj_vector(sycl::handler &cgh, T *ptr, const int64_t len, const int64_t inc, +static inline void conj_vector(sycl::handler& cgh, T* ptr, const int64_t len, const int64_t inc, const int64_t stride, const int64_t batch_size) { const auto abs_inc = std::abs(inc); const auto abs_stride = std::abs(stride); @@ -52,7 +52,7 @@ static inline void conj_vector(sycl::handler &cgh, T *ptr, const int64_t len, co } template -static inline void conj_vector(sycl::handler &cgh, T **ptr, const int64_t len, const int64_t inc, +static inline void conj_vector(sycl::handler& cgh, T** ptr, const int64_t len, const int64_t inc, const int64_t stride, const int64_t group_size) { const auto abs_inc = std::abs(inc); cgh.parallel_for(sycl::range{ (std::size_t)group_size, (std::size_t)len }, @@ -72,30 +72,30 @@ namespace column_major { // Buffer APIs template -inline void copy_batch(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +inline void copy_batch(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy, stridex, stridey, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, stridex, y_, incy, stridey, - batch_size); + batch_size); }); }); } #define COPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, \ + void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, \ int64_t batch_size) { \ copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, stridex, y, incy, stridey, batch_size); \ } @@ -108,30 +108,30 @@ COPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zcopy_strided_batched) #undef COPY_STRIDED_BATCH_LAUNCHER template -inline void axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +inline void axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy, stridex, stridey, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; - rocblas_native_func(func, err, handle, n, (rocDataType *)&alpha, x_, incx, stridex, - y_, incy, stridey, batch_size); + rocblas_native_func(func, err, handle, n, (rocDataType*)&alpha, x_, incx, stridex, y_, + incy, stridey, batch_size); }); }); } #define AXPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void axpy_batch(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, \ + void axpy_batch(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, \ int64_t stridey, int64_t batch_size) { \ axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, stridex, y, incy, stridey, \ batch_size); \ @@ -145,36 +145,36 @@ AXPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zaxpy_strided_batched) #undef AXPY_BATCH_LAUNCHER template -inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, T beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { +inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, T beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, incx, incy, stridea, stridex, stridey, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, - (rocDataType *)&alpha, a_, lda, stridea, x_, incx, stridex, - (rocDataType *)&beta, y_, incy, stridey, batch_size); + (rocDataType*)&alpha, a_, lda, stridea, x_, incx, stridex, + (rocDataType*)&beta, y_, incy, stridey, batch_size); }); }); } #define GEMV_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &x, int64_t incx, int64_t stridex, TYPE beta, \ - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { \ + void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& x, int64_t incx, int64_t stridex, TYPE beta, \ + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { \ gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, \ beta, y, incy, stridey, batch_size); \ } @@ -187,35 +187,35 @@ GEMV_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgemv_strided_batched) #undef GEMV_STRIDED_BATCH_LAUNCHER template -inline void dgmm_batch(Func func, sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &c, int64_t ldc, +inline void dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldc, incx, stridea, stridex, stridec, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; - rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), m, n, a_, - lda, stridea, x_, incx, stridex, c_, ldc, stridec, batch_size); + rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), m, n, a_, lda, + stridea, x_, incx, stridex, c_, ldc, stridec, batch_size); }); }); } #define DGMM_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &x, int64_t incx, int64_t stridex, \ - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ + void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& x, int64_t incx, int64_t stridex, \ + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, \ ldc, stridec, batch_size); \ } @@ -228,10 +228,10 @@ DGMM_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zdgmm_strided_batched) #undef DGMM_STRIDED_BATCH_LAUNCHER template -inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, Ts alpha, sycl::buffer &a, int64_t lda, - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, - Ts beta, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void gemm_batch_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, Ts alpha, sycl::buffer& a, int64_t lda, + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, + Ts beta, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { using rocTypeA = typename RocEquivalentType::Type; using rocTypeB = typename RocEquivalentType::Type; @@ -241,35 +241,35 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran int32_t solution_index = 0; rocblas_gemm_flags flags = rocblas_gemm_flags_none; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(rocblas_gemm_strided_batched_ex, err, handle, - get_rocblas_operation(transa), get_rocblas_operation(transb), m, - n, k, &alpha, a_, get_rocblas_datatype(), lda, - stridea, b_, get_rocblas_datatype(), ldb, strideb, - &beta, c_, get_rocblas_datatype(), ldc, stridec, c_, - get_rocblas_datatype(), ldc, stridec, batch_size, - get_rocblas_datatype(), rocblas_gemm_algo_standard, - solution_index, flags); + get_rocblas_operation(transa), get_rocblas_operation(transb), m, n, + k, &alpha, a_, get_rocblas_datatype(), lda, stridea, b_, + get_rocblas_datatype(), ldb, strideb, &beta, c_, + get_rocblas_datatype(), ldc, stridec, c_, + get_rocblas_datatype(), ldc, stridec, batch_size, + get_rocblas_datatype(), rocblas_gemm_algo_standard, + solution_index, flags); }); }); } #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ gemm_batch_impl(queue, transa, transb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, \ beta, c, ldc, stridec, batch_size); \ @@ -287,10 +287,10 @@ GEMM_STRIDED_BATCH_LAUNCHER(sycl::half, sycl::half, float, float) #undef GEMM_STRIDED_BATCH_LAUNCHER #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ @@ -304,35 +304,34 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER template -inline void trsm_batch(Func func, sycl::queue &queue, side left_right, uplo upper_lower, +inline void trsm_batch(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &b, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, stridea, strideb, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, stridea, b_, ldb, strideb, - batch_size); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, stridea, b_, ldb, strideb, batch_size); }); }); } #define TRSM_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, int64_t stridea, sycl::buffer &b, int64_t ldb, \ + void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, int64_t stridea, sycl::buffer& b, int64_t ldb, \ int64_t strideb, int64_t batch_size) { \ trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, \ a, lda, stridea, b, ldb, strideb, batch_size); \ @@ -346,34 +345,33 @@ TRSM_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_ztrsm_strided_batched) #undef TRSM_STRIDED_BATCH_LAUNCHER template -inline void syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - T beta, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + T beta, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc, stridea, stridec, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, stridea, (rocDataType *)&beta, c_, ldc, stridec, - batch_size); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + stridea, (rocDataType*)&beta, c_, ldc, stridec, batch_size); }); }); } #define SYRK_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, int64_t stridea, TYPE beta, \ - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ + void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, int64_t stridea, TYPE beta, \ + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, stridea, beta, \ c, ldc, stridec, batch_size); \ } @@ -386,9 +384,9 @@ SYRK_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zsyrk_strided_batched) #undef SYRK_STRIDED_BATCH_LAUNCHER template -inline void omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &b, int64_t ldb, int64_t strideb, +inline void omatcopy_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& b, int64_t ldb, int64_t strideb, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, stridea, strideb, batch_size); @@ -397,27 +395,27 @@ inline void omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64 const int64_t new_m = trans == oneapi::math::transpose::nontrans ? m : n; const int64_t new_n = trans == oneapi::math::transpose::nontrans ? n : m; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), - get_rocblas_operation(trans), new_m, new_n, - (rocDataType *)&alpha, a_, lda, stridea, (rocDataType *)&beta, - nullptr, lda, stridea, b_, ldb, strideb, batch_size); + get_rocblas_operation(trans), new_m, new_n, (rocDataType*)&alpha, + a_, lda, stridea, (rocDataType*)&beta, nullptr, lda, stridea, b_, + ldb, strideb, batch_size); }); }); } #define OMATCOPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb, \ + void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb, \ int64_t batch_size) { \ omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, \ strideb, batch_size); \ @@ -430,63 +428,63 @@ OMATCOPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgeam_strided_batc #undef OMATCOPY_STRIDED_BATCH_LAUNCHER -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } template -inline void omatadd_batch(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, sycl::buffer &a, int64_t lda, - int64_t stridea, const T beta, sycl::buffer &b, int64_t ldb, - int64_t strideb, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void omatadd_batch(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, sycl::buffer& a, int64_t lda, + int64_t stridea, const T beta, sycl::buffer& b, int64_t ldb, + int64_t strideb, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc, stridea, strideb, stridec, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, (rocDataType *)&alpha, a_, - lda, stridea, (rocDataType *)&beta, b_, ldb, strideb, c_, ldc, - stridec, batch_size); + get_rocblas_operation(transb), m, n, (rocDataType*)&alpha, a_, lda, + stridea, (rocDataType*)&beta, b_, ldb, strideb, c_, ldc, stridec, + batch_size); }); }); } #define OMATADD_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, const TYPE beta, sycl::buffer &b, int64_t ldb, \ - int64_t strideb, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, const TYPE beta, sycl::buffer& b, int64_t ldb, \ + int64_t strideb, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ omatadd_batch(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, stridea, beta, \ b, ldb, strideb, c, ldc, stridec, batch_size); \ @@ -502,26 +500,26 @@ OMATADD_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgeam_strided_batch // USM APIs template -inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t *n, const T **x, int64_t *incx, - T **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t* n, const T** x, int64_t* incx, + T** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(n[i], incx[i], incy[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **x_ = reinterpret_cast(x); - auto **y_ = reinterpret_cast(y); + auto** x_ = reinterpret_cast(x); + auto** y_ = reinterpret_cast(y); rocblas_native_func(func, err, handle, (int)n[i], x_ + offset, (int)incx[i], - y_ + offset, (int)incy[i], (int)group_size[i]); + y_ + offset, (int)incy[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -531,9 +529,9 @@ inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t *n, const T } #define COPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy_batch(sycl::queue &queue, int64_t *n, const TYPE **x, int64_t *incx, \ - TYPE **y, int64_t *incy, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event copy_batch(sycl::queue& queue, int64_t* n, const TYPE** x, int64_t* incx, \ + TYPE** y, int64_t* incy, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, group_count, group_size, \ dependencies); \ } @@ -546,22 +544,22 @@ COPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zcopy_batched) #undef COPY_BATCH_LAUNCHER_USM template -inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t n, const T *x, int64_t incx, - int64_t stridex, T *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t n, const T* x, int64_t incx, + int64_t stridex, T* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy, stridex, stridey, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, stridex, y_, incy, stridey, - batch_size); + batch_size); }); }); @@ -569,9 +567,9 @@ inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t n, const T } #define COPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy_batch(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, \ - int64_t stridex, TYPE *y, int64_t incy, int64_t stridey, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event copy_batch(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, \ + int64_t stridex, TYPE* y, int64_t incy, int64_t stridey, \ + int64_t batch_size, const std::vector& dependencies) { \ return copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, stridex, y, incy, stridey, \ batch_size, dependencies); \ } @@ -584,27 +582,27 @@ COPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zcopy_strided_batc #undef COPY_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t *n, T *alpha, const T **x, - int64_t *incx, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t* n, T* alpha, const T** x, + int64_t* incx, T** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(n[i], incx[i], incy[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **x_ = reinterpret_cast(x); - auto **y_ = reinterpret_cast(y); - rocblas_native_func(func, err, handle, (int)n[i], (rocDataType *)&alpha[i], - x_ + offset, (int)incx[i], y_ + offset, (int)incy[i], - (int)group_size[i]); + auto** x_ = reinterpret_cast(x); + auto** y_ = reinterpret_cast(y); + rocblas_native_func(func, err, handle, (int)n[i], (rocDataType*)&alpha[i], + x_ + offset, (int)incx[i], y_ + offset, (int)incy[i], + (int)group_size[i]); offset += group_size[i]; } }); @@ -614,9 +612,9 @@ inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t *n, T *alph } #define AXPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy_batch(sycl::queue &queue, int64_t *n, TYPE *alpha, const TYPE **x, \ - int64_t *incx, TYPE **y, int64_t *incy, int64_t group_count, \ - int64_t *group_size, const std::vector &dependencies) { \ + sycl::event axpy_batch(sycl::queue& queue, int64_t* n, TYPE* alpha, const TYPE** x, \ + int64_t* incx, TYPE** y, int64_t* incy, int64_t group_count, \ + int64_t* group_size, const std::vector& dependencies) { \ return axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, group_count, \ group_size, dependencies); \ } @@ -629,22 +627,22 @@ AXPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zaxpy_batched) #undef AXPY_BATCH_LAUNCHER_USM template -inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, const T *x, - int64_t incx, int64_t stridex, T *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha, const T* x, + int64_t incx, int64_t stridex, T* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy, stridex, stridey, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; - rocblas_native_func(func, err, handle, n, (rocDataType *)&alpha, x_, incx, stridex, - y_, incy, stridey, batch_size); + rocblas_native_func(func, err, handle, n, (rocDataType*)&alpha, x_, incx, stridex, y_, + incy, stridey, batch_size); }); }); @@ -652,9 +650,9 @@ inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, } #define AXPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy_batch(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - int64_t stridex, TYPE *y, int64_t incy, int64_t stridey, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event axpy_batch(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + int64_t stridex, TYPE* y, int64_t incy, int64_t stridey, \ + int64_t batch_size, const std::vector& dependencies) { \ return axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, stridex, y, incy, stridey, \ batch_size, dependencies); \ } @@ -667,26 +665,26 @@ AXPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zaxpy_strided_batc #undef AXPY_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, int64_t stridea, const T *x, - int64_t incx, int64_t stridex, T beta, T *y, int64_t incy, +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, const T* a, int64_t lda, int64_t stridea, const T* x, + int64_t incx, int64_t stridex, T beta, T* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, incx, incy, stridea, stridex, stridey, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, - (rocDataType *)&alpha, a_, lda, stridea, x_, incx, stridex, - (rocDataType *)&beta, y_, incy, stridey, batch_size); + (rocDataType*)&alpha, a_, lda, stridea, x_, incx, stridex, + (rocDataType*)&beta, y_, incy, stridey, batch_size); }); }); @@ -694,11 +692,11 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, in } #define GEMV_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, const TYPE *x, \ - int64_t incx, int64_t stridex, TYPE beta, TYPE *y, int64_t incy, \ + sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, const TYPE* x, \ + int64_t incx, int64_t stridex, TYPE beta, TYPE* y, int64_t incy, \ int64_t stridey, int64_t batch_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, x, incx, \ stridex, beta, y, incy, stridey, batch_size, dependencies); \ } @@ -711,30 +709,30 @@ GEMV_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgemv_strided_batc #undef GEMV_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, const T **x, - int64_t *incx, T *beta, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, T* alpha, const T** a, int64_t* lda, const T** x, + int64_t* incx, T* beta, T** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], incx[i], incy[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **x_ = reinterpret_cast(x); - auto **y_ = reinterpret_cast(y); - rocblas_native_func( - func, err, handle, get_rocblas_operation(trans[i]), (int)m[i], (int)n[i], - (rocDataType *)&alpha[i], a_ + offset, (int)lda[i], x_ + offset, (int)incx[i], - (rocDataType *)&beta[i], y_ + offset, (int)incy[i], (int)group_size[i]); + auto** a_ = reinterpret_cast(a); + auto** x_ = reinterpret_cast(x); + auto** y_ = reinterpret_cast(y); + rocblas_native_func(func, err, handle, get_rocblas_operation(trans[i]), (int)m[i], + (int)n[i], (rocDataType*)&alpha[i], a_ + offset, (int)lda[i], + x_ + offset, (int)incx[i], (rocDataType*)&beta[i], y_ + offset, + (int)incy[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -745,9 +743,9 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i #define GEMV_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ sycl::event gemv_batch( \ - sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, TYPE *alpha, const TYPE **a, \ - int64_t *lda, const TYPE **x, int64_t *incx, TYPE *beta, TYPE **y, int64_t *incy, \ - int64_t group_count, int64_t *group_size, const std::vector &dependencies) { \ + sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, TYPE* alpha, const TYPE** a, \ + int64_t* lda, const TYPE** x, int64_t* incx, TYPE* beta, TYPE** y, int64_t* incy, \ + int64_t group_count, int64_t* group_size, const std::vector& dependencies) { \ return gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, \ incy, group_count, group_size, dependencies); \ } @@ -760,24 +758,24 @@ GEMV_BATCH_LAUNCHER_USM(std::complex, rocblas_zgemv_batched) #undef GEMV_BATCH_LAUNCHER_USM template -inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side left_right, int64_t m, int64_t n, - const T *a, int64_t lda, int64_t stridea, const T *x, int64_t incx, - int64_t stridex, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m, int64_t n, + const T* a, int64_t lda, int64_t stridea, const T* x, int64_t incx, + int64_t stridex, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, incx, stridea, stridex, stridec, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto c_ = reinterpret_cast(c); rocblas_status err; - rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), m, n, a_, - lda, stridea, x_, incx, stridex, c_, ldc, stridec, batch_size); + rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), m, n, a_, lda, + stridea, x_, incx, stridex, c_, ldc, stridec, batch_size); }); }); @@ -785,10 +783,10 @@ inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side left_right, in } #define DGMM_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, \ - const TYPE *a, int64_t lda, int64_t stridea, const TYPE *x, \ - int64_t incx, int64_t stridex, TYPE *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, \ + const TYPE* a, int64_t lda, int64_t stridea, const TYPE* x, \ + int64_t incx, int64_t stridex, TYPE* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, stridea, x, incx, \ stridex, c, ldc, stridec, batch_size, dependencies); \ } @@ -801,29 +799,29 @@ DGMM_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zdgmm_strided_batc #undef DGMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side *left_right, int64_t *m, - int64_t *n, const T **a, int64_t *lda, const T **x, int64_t *incx, - T **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side* left_right, int64_t* m, + int64_t* n, const T** a, int64_t* lda, const T** x, int64_t* incx, + T** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], ldc[i], incx[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **x_ = reinterpret_cast(x); - auto **c_ = reinterpret_cast(c); + auto** a_ = reinterpret_cast(a); + auto** x_ = reinterpret_cast(x); + auto** c_ = reinterpret_cast(c); rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right[i]), - (int)m[i], (int)n[i], a_ + offset, (int)lda[i], x_ + offset, - (int)incx[i], c_ + offset, (int)ldc[i], (int)group_size[i]); + (int)m[i], (int)n[i], a_ + offset, (int)lda[i], x_ + offset, + (int)incx[i], c_ + offset, (int)ldc[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -833,10 +831,10 @@ inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side *left_right, i } #define DGMM_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, \ - const TYPE **a, int64_t *lda, const TYPE **x, int64_t *incx, TYPE **c, \ - int64_t *ldc, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, \ + const TYPE** a, int64_t* lda, const TYPE** x, int64_t* incx, TYPE** c, \ + int64_t* ldc, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, x, incx, c, ldc, \ group_count, group_size, dependencies); \ } @@ -849,13 +847,13 @@ DGMM_BATCH_LAUNCHER_USM(std::complex, rocblas_zdgmm_batched) #undef DGMM_BATCH_LAUNCHER template -inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose transa, +inline sycl::event gemm_batch_strided_usm_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - Ts alpha, const Ta *a, int64_t lda, int64_t stridea, - const Tb *b, int64_t ldb, int64_t strideb, Ts beta, - Tc *c, int64_t ldc, int64_t stridec, + Ts alpha, const Ta* a, int64_t lda, int64_t stridea, + const Tb* b, int64_t ldb, int64_t strideb, Ts beta, + Tc* c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocTypeA = typename RocEquivalentType::Type; using rocTypeB = typename RocEquivalentType::Type; using rocTypeC = typename RocEquivalentType::Type; @@ -864,23 +862,23 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra int32_t solution_index = 0; rocblas_gemm_flags flags = rocblas_gemm_flags_none; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(rocblas_gemm_strided_batched_ex, err, handle, - get_rocblas_operation(transa), get_rocblas_operation(transb), m, - n, k, &alpha, a_, get_rocblas_datatype(), lda, - stridea, b_, get_rocblas_datatype(), ldb, strideb, - &beta, c_, get_rocblas_datatype(), ldc, stridec, c_, - get_rocblas_datatype(), ldc, stridec, batch_size, - get_rocblas_datatype(), rocblas_gemm_algo_standard, - solution_index, flags); + get_rocblas_operation(transa), get_rocblas_operation(transb), m, n, + k, &alpha, a_, get_rocblas_datatype(), lda, stridea, b_, + get_rocblas_datatype(), ldb, strideb, &beta, c_, + get_rocblas_datatype(), ldc, stridec, c_, + get_rocblas_datatype(), ldc, stridec, batch_size, + get_rocblas_datatype(), rocblas_gemm_algo_standard, + solution_index, flags); }); }); @@ -888,11 +886,11 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra } #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stridea, const TYPE_B *b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stridea, const TYPE_B* b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return gemm_batch_strided_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, stridea, \ b, ldb, strideb, beta, c, ldc, stridec, batch_size, \ dependencies); \ @@ -910,11 +908,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(sycl::half, sycl::half, float, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stridea, const TYPE_B *b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stridea, const TYPE_B* b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -927,11 +925,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, transpose *transb, - int64_t *m, int64_t *n, int64_t *k, Ts *alpha, const Ta **a, - int64_t *lda, const Tb **b, int64_t *ldb, Ts *beta, Tc **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event gemm_batch_usm_impl(sycl::queue& queue, transpose* transa, transpose* transb, + int64_t* m, int64_t* n, int64_t* k, Ts* alpha, const Ta** a, + int64_t* lda, const Tb** b, int64_t* ldb, Ts* beta, Tc** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocTypeA = typename RocEquivalentType::Type; using rocTypeB = typename RocEquivalentType::Type; using rocTypeC = typename RocEquivalentType::Type; @@ -942,17 +940,17 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr int32_t solution_index = 0; rocblas_gemm_flags flags = rocblas_gemm_flags_none; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **b_ = reinterpret_cast(b); - auto **c_ = reinterpret_cast(c); + auto** a_ = reinterpret_cast(a); + auto** b_ = reinterpret_cast(b); + auto** c_ = reinterpret_cast(c); rocblas_native_func( rocblas_gemm_batched_ex, err, handle, get_rocblas_operation(transa[i]), get_rocblas_operation(transb[i]), (int)m[i], (int)n[i], (int)k[i], &alpha[i], @@ -971,11 +969,11 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr } #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return gemm_batch_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc, group_count, group_size, dependencies); \ } @@ -992,11 +990,11 @@ GEMM_BATCH_LAUNCHER_USM(sycl::half, sycl::half, float, float) #undef GEMM_BATCH_LAUNCHER_USM #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -1009,27 +1007,26 @@ GEMM_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_BATCH_LAUNCHER_USM template -inline sycl::event trsm_batch(Func func, sycl::queue &queue, side left_right, uplo upper_lower, +inline sycl::event trsm_batch(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - const T *a, int64_t lda, int64_t stridea, T *b, int64_t ldb, + const T* a, int64_t lda, int64_t stridea, T* b, int64_t ldb, int64_t strideb, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, stridea, strideb, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, stridea, b_, ldb, strideb, - batch_size); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, stridea, b_, ldb, strideb, batch_size); }); }); @@ -1037,10 +1034,10 @@ inline sycl::event trsm_batch(Func func, sycl::queue &queue, side left_right, up } #define TRSM_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, int64_t strideb, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, int64_t strideb, \ + int64_t batch_size, const std::vector& dependencies) { \ return trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, stridea, b, ldb, strideb, batch_size, dependencies); \ } @@ -1053,31 +1050,31 @@ TRSM_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_ztrsm_strided_batc #undef TRSM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event trsm_batch(Func func, sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, T *alpha, - const T **a, int64_t *lda, T **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event trsm_batch(Func func, sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, T* alpha, + const T** a, int64_t* lda, T** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], ldb[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **b_ = reinterpret_cast(b); + auto** a_ = reinterpret_cast(a); + auto** b_ = reinterpret_cast(b); rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right[i]), - get_rocblas_fill_mode(upper_lower[i]), - get_rocblas_operation(trans[i]), - get_rocblas_diag_type(unit_diag[i]), (int)m[i], (int)n[i], - (rocDataType *)&alpha[i], a_ + offset, (int)lda[i], - b_ + offset, (int)ldb[i], (int)group_size[i]); + get_rocblas_fill_mode(upper_lower[i]), + get_rocblas_operation(trans[i]), + get_rocblas_diag_type(unit_diag[i]), (int)m[i], (int)n[i], + (rocDataType*)&alpha[i], a_ + offset, (int)lda[i], b_ + offset, + (int)ldb[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -1087,11 +1084,11 @@ inline sycl::event trsm_batch(Func func, sycl::queue &queue, side *left_right, u } #define TRSM_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, \ - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, TYPE *alpha, \ - const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, \ + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, TYPE* alpha, \ + const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, group_count, group_size, dependencies); \ } @@ -1104,30 +1101,30 @@ TRSM_BATCH_LAUNCHER_USM(std::complex, rocblas_ztrsm_batched) #undef TRSM_BATCH_LAUNCHER_USM template -inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo *upper_lower, transpose *trans, - int64_t *n, int64_t *k, T *alpha, const T **a, int64_t *lda, T *beta, - T **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo* upper_lower, transpose* trans, + int64_t* n, int64_t* k, T* alpha, const T** a, int64_t* lda, T* beta, + T** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(n[i], k[i], lda[i], ldc[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **c_ = reinterpret_cast(c); + auto** a_ = reinterpret_cast(a); + auto** c_ = reinterpret_cast(c); rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower[i]), - get_rocblas_operation(trans[i]), (int)n[i], (int)k[i], - (rocDataType *)&alpha[i], a_ + offset, (int)lda[i], - (rocDataType *)&beta[i], c_ + offset, (int)ldc[i], - (int)group_size[i]); + get_rocblas_operation(trans[i]), (int)n[i], (int)k[i], + (rocDataType*)&alpha[i], a_ + offset, (int)lda[i], + (rocDataType*)&beta[i], c_ + offset, (int)ldc[i], + (int)group_size[i]); offset += group_size[i]; } }); @@ -1137,10 +1134,10 @@ inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo *upper_lower, } #define SYRK_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, \ - int64_t *k, TYPE *alpha, const TYPE **a, int64_t *lda, TYPE *beta, \ - TYPE **c, int64_t *ldc, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, \ + int64_t* k, TYPE* alpha, const TYPE** a, int64_t* lda, TYPE* beta, \ + TYPE** c, int64_t* ldc, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, \ c, ldc, group_count, group_size, dependencies); \ } @@ -1153,25 +1150,24 @@ SYRK_BATCH_LAUNCHER_USM(std::complex, rocblas_zsyrk_batched) #undef SYRK_BATCH_LAUNCHER_USM template -inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, const T alpha, const T *a, int64_t lda, - int64_t stridea, const T beta, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, const T alpha, const T* a, int64_t lda, + int64_t stridea, const T beta, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc, stridea, stridec, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, stridea, (rocDataType *)&beta, c_, ldc, stridec, - batch_size); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + stridea, (rocDataType*)&beta, c_, ldc, stridec, batch_size); }); }); @@ -1179,11 +1175,11 @@ inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, t } #define SYRK_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, \ - int64_t k, const TYPE alpha, const TYPE *a, int64_t lda, \ - int64_t stridea, const TYPE beta, TYPE *c, int64_t ldc, \ + sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, \ + int64_t k, const TYPE alpha, const TYPE* a, int64_t lda, \ + int64_t stridea, const TYPE beta, TYPE* c, int64_t ldc, \ int64_t stridec, int64_t batch_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ stridea, beta, c, ldc, stridec, batch_size, dependencies); \ } @@ -1196,11 +1192,11 @@ SYRK_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zsyrk_strided_batc #undef SYRK_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, const T alpha, const T *a, int64_t lda, - int64_t stridea, T *b, int64_t ldb, int64_t strideb, +inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, const T alpha, const T* a, int64_t lda, + int64_t stridea, T* b, int64_t ldb, int64_t strideb, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, stridea, strideb, batch_size); @@ -1208,18 +1204,18 @@ inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose trans const int64_t new_m = trans == oneapi::math::transpose::nontrans ? m : n; const int64_t new_n = trans == oneapi::math::transpose::nontrans ? n : m; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), - get_rocblas_operation(trans), new_m, new_n, - (rocDataType *)&alpha, a_, lda, stridea, (rocDataType *)&beta, - nullptr, lda, stridea, b_, ldb, strideb, batch_size); + get_rocblas_operation(trans), new_m, new_n, (rocDataType*)&alpha, + a_, lda, stridea, (rocDataType*)&beta, nullptr, lda, stridea, b_, + ldb, strideb, batch_size); }); }); @@ -1227,10 +1223,10 @@ inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose trans } #define OMATCOPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, const TYPE *a, int64_t lda, int64_t stridea, \ - TYPE *b, int64_t ldb, int64_t strideb, int64_t batch_size, \ - const std::vector &dependencies) { \ + sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, const TYPE* a, int64_t lda, int64_t stridea, \ + TYPE* b, int64_t ldb, int64_t strideb, int64_t batch_size, \ + const std::vector& dependencies) { \ return omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, \ strideb, batch_size, dependencies); \ } @@ -1242,54 +1238,54 @@ OMATCOPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_strided_ #undef OMATCOPY_STRIDED_BATCH_LAUNCHER_USM -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } template -inline sycl::event omatadd_batch(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, const T *a, int64_t lda, - int64_t stridea, const T beta, const T *b, int64_t ldb, - int64_t strideb, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event omatadd_batch(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, const T* a, int64_t lda, + int64_t stridea, const T beta, const T* b, int64_t ldb, + int64_t strideb, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc, stridea, strideb, stridec, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, (rocDataType *)&alpha, a_, - lda, stridea, (rocDataType *)&beta, b_, ldb, strideb, c_, ldc, - stridec, batch_size); + get_rocblas_operation(transb), m, n, (rocDataType*)&alpha, a_, lda, + stridea, (rocDataType*)&beta, b_, ldb, strideb, c_, ldc, stridec, + batch_size); }); }); @@ -1297,11 +1293,11 @@ inline sycl::event omatadd_batch(Func func, sycl::queue &queue, transpose transa } #define OMATADD_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, const TYPE *a, int64_t lda, \ - int64_t stridea, const TYPE beta, const TYPE *b, int64_t ldb, \ - int64_t strideb, TYPE *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, const TYPE* a, int64_t lda, \ + int64_t stridea, const TYPE beta, const TYPE* b, int64_t ldb, \ + int64_t strideb, TYPE* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return omatadd_batch(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, stridea, \ beta, b, ldb, strideb, c, ldc, stridec, batch_size, dependencies); \ } @@ -1314,35 +1310,35 @@ OMATADD_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_strided_b #undef OMATADD_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, T **b, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, T* alpha, const T** a, int64_t* lda, T** b, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], ldb[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **b_ = reinterpret_cast(b); + auto** a_ = reinterpret_cast(a); + auto** b_ = reinterpret_cast(b); const T beta = 0; const auto new_m = trans[i] == oneapi::math::transpose::nontrans ? m[i] : n[i]; const auto new_n = trans[i] == oneapi::math::transpose::nontrans ? n[i] : m[i]; rocblas_native_func(func, err, handle, get_rocblas_operation(trans[i]), - get_rocblas_operation(trans[i]), (int)new_m, (int)new_n, - (rocDataType *)&alpha[i], a_ + offset, (int)lda[i], - (rocDataType *)&beta, nullptr, (int)lda[i], b_ + offset, - (int)ldb[i], (int)group_size[i]); + get_rocblas_operation(trans[i]), (int)new_m, (int)new_n, + (rocDataType*)&alpha[i], a_ + offset, (int)lda[i], + (rocDataType*)&beta, nullptr, (int)lda[i], b_ + offset, + (int)ldb[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -1352,10 +1348,10 @@ inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose *tran } #define OMATCOPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, \ - TYPE *alpha, const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, \ + TYPE* alpha, const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb, \ group_count, group_size, dependencies); \ } @@ -1367,31 +1363,31 @@ OMATCOPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_batched) #undef OMATCOPY_BATCH_LAUNCHER_USM -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, float **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, float** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, double **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, double** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } @@ -1402,15 +1398,15 @@ namespace row_major { // Buffer APIs template -inline void copy_batch(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +inline void copy_batch(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { column_major::copy_batch(func, queue, n, x, incx, stridex, y, incy, stridey, batch_size); } #define COPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, \ + void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, \ int64_t batch_size) { \ copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, stridex, y, incy, stridey, batch_size); \ } @@ -1423,15 +1419,15 @@ COPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zcopy_strided_batched) #undef COPY_STRIDED_BATCH_LAUNCHER template -inline void axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +inline void axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { column_major::axpy_batch(func, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } #define AXPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void axpy_batch(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, \ + void axpy_batch(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, \ int64_t stridey, int64_t batch_size) { \ axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, stridex, y, incy, stridey, \ batch_size); \ @@ -1445,13 +1441,13 @@ AXPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zaxpy_strided_batched) #undef AXPY_STRIDED_BATCH_LAUNCHER template -inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, - int64_t stridex, std::complex beta, sycl::buffer, 1> &y, +inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, + int64_t stridex, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { alpha = std::conj(alpha); @@ -1459,11 +1455,11 @@ inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m if (m > 0) { queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, x, m, incx, stridex, batch_size); }); + [&](sycl::handler& cgh) { conj_vector(cgh, x, m, incx, stridex, batch_size); }); if (n > 0) { queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); + [&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); } } } @@ -1474,28 +1470,28 @@ inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); + [&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); } } } template -inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, T beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { +inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, T beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::gemv_batch(func, queue, new_trans, n, m, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } #define GEMV_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &x, int64_t incx, int64_t stridex, TYPE beta, \ - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { \ + void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& x, int64_t incx, int64_t stridex, TYPE beta, \ + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { \ gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, \ beta, y, incy, stridey, batch_size); \ } @@ -1508,22 +1504,22 @@ GEMV_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgemv_strided_batched) #undef GEMV_STRIDED_BATCH_LAUNCHER template -inline void dgmm_batch(Func func, sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &c, int64_t ldc, +inline void dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; column_major::dgmm_batch(func, queue, new_side, n, m, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } #define DGMM_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &x, int64_t incx, int64_t stridex, \ - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ + void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& x, int64_t incx, int64_t stridex, \ + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, \ ldc, stridec, batch_size); \ } @@ -1536,10 +1532,10 @@ DGMM_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zdgmm_strided_batched) #undef DGMM_STRIDED_BATCH_LAUNCHER template -inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, Ts alpha, sycl::buffer &a, int64_t lda, - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, - Ts beta, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void gemm_batch_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, Ts alpha, sycl::buffer& a, int64_t lda, + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, + Ts beta, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { auto new_transa = transb; auto new_transb = transa; @@ -1550,10 +1546,10 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran #undef GEMM_STRIDED_BATCH_LAUNCHER #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ gemm_batch_impl(queue, transa, transb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, \ beta, c, ldc, stridec, batch_size); \ @@ -1571,10 +1567,10 @@ GEMM_STRIDED_BATCH_LAUNCHER(sycl::half, sycl::half, float, float) #undef GEMM_STRIDED_BATCH_LAUNCHER #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ @@ -1588,23 +1584,23 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER template -inline void trsm_batch(Func func, sycl::queue &queue, side left_right, uplo upper_lower, +inline void trsm_batch(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &b, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, int64_t batch_size) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::trsm_batch(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a, lda, stridea, b, ldb, strideb, batch_size); } #define TRSM_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, int64_t stridea, sycl::buffer &b, int64_t ldb, \ + void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, int64_t stridea, sycl::buffer& b, int64_t ldb, \ int64_t strideb, int64_t batch_size) { \ trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, \ a, lda, stridea, b, ldb, strideb, batch_size); \ @@ -1618,23 +1614,23 @@ TRSM_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_ztrsm_strided_batched) #undef TRSM_STRIDED_BATCH_LAUNCHER template -inline void syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - T beta, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + T beta, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::syrk_batch(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, stridea, beta, c, ldc, stridec, batch_size); } #define SYRK_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, int64_t stridea, TYPE beta, \ - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ + void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, int64_t stridea, TYPE beta, \ + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, stridea, beta, \ c, ldc, stridec, batch_size); \ } @@ -1647,18 +1643,18 @@ SYRK_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zsyrk_strided_batched) #undef SYRK_STRIDED_BATCH_LAUNCHER template -inline void omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &b, int64_t ldb, int64_t strideb, +inline void omatcopy_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& b, int64_t ldb, int64_t strideb, int64_t batch_size) { return column_major::omatcopy_batch(func, queue, trans, n, m, alpha, a, lda, stridea, b, ldb, strideb, batch_size); } #define OMATCOPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb, \ + void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb, \ int64_t batch_size) { \ omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, \ strideb, batch_size); \ @@ -1671,45 +1667,45 @@ OMATCOPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgeam_strided_batc #undef OMATCOPY_STRIDED_BATCH_LAUNCHER -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } template -inline void omatadd_batch(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, sycl::buffer &a, int64_t lda, - int64_t stridea, const T beta, sycl::buffer &b, int64_t ldb, - int64_t strideb, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void omatadd_batch(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, sycl::buffer& a, int64_t lda, + int64_t stridea, const T beta, sycl::buffer& b, int64_t ldb, + int64_t strideb, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { return column_major::omatadd_batch(func, queue, transa, transb, n, m, alpha, a, lda, stridea, beta, b, ldb, strideb, c, ldc, stridec, batch_size); } #define OMATADD_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, const TYPE beta, sycl::buffer &b, int64_t ldb, \ - int64_t strideb, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, const TYPE beta, sycl::buffer& b, int64_t ldb, \ + int64_t strideb, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ omatadd_batch(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, stridea, beta, \ b, ldb, strideb, c, ldc, stridec, batch_size); \ @@ -1725,17 +1721,17 @@ OMATADD_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgeam_strided_batch // USM APIs template -inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t *n, const T **x, int64_t *incx, - T **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t* n, const T** x, int64_t* incx, + T** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return column_major::copy_batch(func, queue, n, x, incx, y, incy, group_count, group_size, dependencies); } #define COPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy_batch(sycl::queue &queue, int64_t *n, const TYPE **x, int64_t *incx, \ - TYPE **y, int64_t *incy, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event copy_batch(sycl::queue& queue, int64_t* n, const TYPE** x, int64_t* incx, \ + TYPE** y, int64_t* incy, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, group_count, group_size, \ dependencies); \ } @@ -1748,17 +1744,17 @@ COPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zcopy_batched) #undef COPY_BATCH_LAUNCHER_USM template -inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t n, const T *x, int64_t incx, - int64_t stridex, T *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t n, const T* x, int64_t incx, + int64_t stridex, T* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { return column_major::copy_batch(func, queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } #define COPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy_batch(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, \ - int64_t stridex, TYPE *y, int64_t incy, int64_t stridey, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event copy_batch(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, \ + int64_t stridex, TYPE* y, int64_t incy, int64_t stridey, \ + int64_t batch_size, const std::vector& dependencies) { \ return copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, stridex, y, incy, stridey, \ batch_size, dependencies); \ } @@ -1771,17 +1767,17 @@ COPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zcopy_strided_batc #undef COPY_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t *n, T *alpha, const T **x, - int64_t *incx, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t* n, T* alpha, const T** x, + int64_t* incx, T** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { return column_major::axpy_batch(func, queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } #define AXPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy_batch(sycl::queue &queue, int64_t *n, TYPE *alpha, const TYPE **x, \ - int64_t *incx, TYPE **y, int64_t *incy, int64_t group_count, \ - int64_t *group_size, const std::vector &dependencies) { \ + sycl::event axpy_batch(sycl::queue& queue, int64_t* n, TYPE* alpha, const TYPE** x, \ + int64_t* incx, TYPE** y, int64_t* incy, int64_t group_count, \ + int64_t* group_size, const std::vector& dependencies) { \ return axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, group_count, \ group_size, dependencies); \ } @@ -1794,17 +1790,17 @@ AXPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zaxpy_batched) #undef AXPY_BATCH_LAUNCHER_USM template -inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, const T *x, - int64_t incx, int64_t stridex, T *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha, const T* x, + int64_t incx, int64_t stridex, T* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { return column_major::axpy_batch(func, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } #define AXPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy_batch(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - int64_t stridex, TYPE *y, int64_t incy, int64_t stridey, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event axpy_batch(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + int64_t stridex, TYPE* y, int64_t incy, int64_t stridey, \ + int64_t batch_size, const std::vector& dependencies) { \ return axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, stridex, y, incy, stridey, \ batch_size, dependencies); \ } @@ -1817,29 +1813,29 @@ AXPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zaxpy_strided_batc #undef AXPY_BATCH_LAUNCHER_USM template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stridea, const std::complex *x, int64_t incx, - int64_t stridex, std::complex beta, std::complex *y, +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stridea, const std::complex* x, int64_t incx, + int64_t stridex, std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { sycl::event done; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { alpha = std::conj(alpha); beta = std::conj(beta); if (m > 0) { - done = queue.submit([&](sycl::handler &cgh) { - conj_vector(cgh, (std::complex *)x, m, incx, stridex, batch_size); + done = queue.submit([&](sycl::handler& cgh) { + conj_vector(cgh, (std::complex*)x, m, incx, stridex, batch_size); }); if (n > 0) { done = queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); + [&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); } } } @@ -1851,7 +1847,7 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, in if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy, stridey, batch_size); }); @@ -1862,24 +1858,24 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, in } template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, int64_t stridea, const T *x, - int64_t incx, int64_t stridex, T beta, T *y, int64_t incy, +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, const T* a, int64_t lda, int64_t stridea, const T* x, + int64_t incx, int64_t stridex, T beta, T* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::gemv_batch(func, queue, new_trans, n, m, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } #define GEMV_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, const TYPE *x, \ - int64_t incx, int64_t stridex, TYPE beta, TYPE *y, int64_t incy, \ + sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, const TYPE* x, \ + int64_t incx, int64_t stridex, TYPE beta, TYPE* y, int64_t incy, \ int64_t stridey, int64_t batch_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, x, incx, \ stridex, beta, y, incy, stridey, batch_size, dependencies); \ } @@ -1892,12 +1888,12 @@ GEMV_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgemv_strided_batc #undef GEMV_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, std::complex *alpha, const std::complex **a, - int64_t *lda, const std::complex **x, int64_t *incx, - std::complex *beta, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, std::complex* alpha, const std::complex** a, + int64_t* lda, const std::complex** x, int64_t* incx, + std::complex* beta, std::complex** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { sycl::event done; int64_t stride = 0; @@ -1907,12 +1903,12 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i beta[i] = std::conj(beta[i]); if (m[i] > 0) { - done = queue.submit([&](sycl::handler &cgh) { - conj_vector(cgh, (std::complex **)x, m[i], incx[i], stride, group_size[i]); + done = queue.submit([&](sycl::handler& cgh) { + conj_vector(cgh, (std::complex**)x, m[i], incx[i], stride, group_size[i]); }); if (n[i] > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n[i], incy[i], stride, group_size[i]); }); } @@ -1942,7 +1938,7 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i for (int64_t i = 0; i < group_count; i++) { if (trans[i] == oneapi::math::transpose::conjtrans) { if (n[i] > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n[i], incy[i], stride, group_size[i]); }); } @@ -1954,10 +1950,10 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i } template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, const T **x, - int64_t *incx, T *beta, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, T* alpha, const T** a, int64_t* lda, const T** x, + int64_t* incx, T* beta, T** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto tmp_trans = std::vector{ static_cast(group_count) }; for (int64_t i = 0; i < group_count; i++) { @@ -1979,9 +1975,9 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i #define GEMV_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ sycl::event gemv_batch( \ - sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, TYPE *alpha, const TYPE **a, \ - int64_t *lda, const TYPE **x, int64_t *incx, TYPE *beta, TYPE **y, int64_t *incy, \ - int64_t group_count, int64_t *group_size, const std::vector &dependencies) { \ + sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, TYPE* alpha, const TYPE** a, \ + int64_t* lda, const TYPE** x, int64_t* incx, TYPE* beta, TYPE** y, int64_t* incy, \ + int64_t group_count, int64_t* group_size, const std::vector& dependencies) { \ return gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, \ incy, group_count, group_size, dependencies); \ } @@ -1994,22 +1990,22 @@ GEMV_BATCH_LAUNCHER_USM(std::complex, rocblas_zgemv_batched) #undef GEMV_BATCH_LAUNCHER_USM template -inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side left_right, int64_t m, int64_t n, - const T *a, int64_t lda, int64_t stridea, const T *x, int64_t incx, - int64_t stridex, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; +inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m, int64_t n, + const T* a, int64_t lda, int64_t stridea, const T* x, int64_t incx, + int64_t stridex, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; return column_major::dgmm_batch(func, queue, new_side, n, m, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } #define DGMM_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, \ - const TYPE *a, int64_t lda, int64_t stridea, const TYPE *x, \ - int64_t incx, int64_t stridex, TYPE *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, \ + const TYPE* a, int64_t lda, int64_t stridea, const TYPE* x, \ + int64_t incx, int64_t stridex, TYPE* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, stridea, x, incx, \ stridex, c, ldc, stridec, batch_size, dependencies); \ } @@ -2022,13 +2018,13 @@ DGMM_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zdgmm_strided_batc #undef DGMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side *left_right, int64_t *m, - int64_t *n, const T **a, int64_t *lda, const T **x, int64_t *incx, - T **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side* left_right, int64_t* m, + int64_t* n, const T** a, int64_t* lda, const T** x, int64_t* incx, + T** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { for (int64_t i = 0; i < group_count; i++) { const auto new_side = left_right[i] == oneapi::math::side::left ? oneapi::math::side::right - : oneapi::math::side::left; + : oneapi::math::side::left; left_right[i] = new_side; } @@ -2037,10 +2033,10 @@ inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side *left_right, i } #define DGMM_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, \ - const TYPE **a, int64_t *lda, const TYPE **x, int64_t *incx, TYPE **c, \ - int64_t *ldc, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, \ + const TYPE** a, int64_t* lda, const TYPE** x, int64_t* incx, TYPE** c, \ + int64_t* ldc, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, x, incx, c, ldc, \ group_count, group_size, dependencies); \ } @@ -2053,13 +2049,13 @@ DGMM_BATCH_LAUNCHER_USM(std::complex, rocblas_zdgmm_batched) #undef DGMM_BATCH_LAUNCHER template -inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose transa, +inline sycl::event gemm_batch_strided_usm_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - Ts alpha, const Ta *a, int64_t lda, int64_t stridea, - const Tb *b, int64_t ldb, int64_t strideb, Ts beta, - Tc *c, int64_t ldc, int64_t stridec, + Ts alpha, const Ta* a, int64_t lda, int64_t stridea, + const Tb* b, int64_t ldb, int64_t strideb, Ts beta, + Tc* c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto new_transa = transb; auto new_transb = transa; @@ -2069,11 +2065,11 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra } #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stridea, const TYPE_B *b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stridea, const TYPE_B* b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return gemm_batch_strided_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, stridea, \ b, ldb, strideb, beta, c, ldc, stridec, batch_size, \ dependencies); \ @@ -2091,11 +2087,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(sycl::half, sycl::half, float, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stridea, const TYPE_B *b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stridea, const TYPE_B* b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -2108,11 +2104,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, transpose *transb, - int64_t *m, int64_t *n, int64_t *k, Ts *alpha, const Ta **a, - int64_t *lda, const Tb **b, int64_t *ldb, Ts *beta, Tc **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event gemm_batch_usm_impl(sycl::queue& queue, transpose* transa, transpose* transb, + int64_t* m, int64_t* n, int64_t* k, Ts* alpha, const Ta** a, + int64_t* lda, const Tb** b, int64_t* ldb, Ts* beta, Tc** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { for (int64_t i = 0; i < group_count; i++) { std::swap(transa[i], transb[i]); } @@ -2122,11 +2118,11 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr } #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return gemm_batch_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc, group_count, group_size, dependencies); \ } @@ -2143,11 +2139,11 @@ GEMM_BATCH_LAUNCHER_USM(sycl::half, sycl::half, float, float) #undef GEMM_BATCH_LAUNCHER_USM #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -2160,25 +2156,25 @@ GEMM_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_BATCH_LAUNCHER_USM template -inline sycl::event trsm_batch(Func func, sycl::queue &queue, side left_right, uplo upper_lower, +inline sycl::event trsm_batch(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - const T *a, int64_t lda, int64_t stridea, T *b, int64_t ldb, + const T* a, int64_t lda, int64_t stridea, T* b, int64_t ldb, int64_t strideb, int64_t batch_size, - const std::vector &dependencies) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; + const std::vector& dependencies) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::trsm_batch(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a, lda, stridea, b, ldb, strideb, batch_size, dependencies); } #define TRSM_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, int64_t strideb, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, int64_t strideb, \ + int64_t batch_size, const std::vector& dependencies) { \ return trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, stridea, b, ldb, strideb, batch_size, dependencies); \ } @@ -2191,17 +2187,18 @@ TRSM_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_ztrsm_strided_batc #undef TRSM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event trsm_batch(Func func, sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, T *alpha, - const T **a, int64_t *lda, T **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event trsm_batch(Func func, sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, T* alpha, + const T** a, int64_t* lda, T** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { for (int64_t i = 0; i < group_count; i++) { const auto new_side = left_right[i] == oneapi::math::side::left ? oneapi::math::side::right - : oneapi::math::side::left; + : oneapi::math::side::left; left_right[i] = new_side; - const auto new_uplo = upper_lower[i] == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + const auto new_uplo = upper_lower[i] == oneapi::math::uplo::lower + ? oneapi::math::uplo::upper + : oneapi::math::uplo::lower; upper_lower[i] = new_uplo; } @@ -2210,11 +2207,11 @@ inline sycl::event trsm_batch(Func func, sycl::queue &queue, side *left_right, u } #define TRSM_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, \ - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, TYPE *alpha, \ - const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, \ + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, TYPE* alpha, \ + const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, group_count, group_size, dependencies); \ } @@ -2227,13 +2224,14 @@ TRSM_BATCH_LAUNCHER_USM(std::complex, rocblas_ztrsm_batched) #undef TRSM_BATCH_LAUNCHER_USM template -inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo *upper_lower, transpose *trans, - int64_t *n, int64_t *k, T *alpha, const T **a, int64_t *lda, T *beta, - T **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo* upper_lower, transpose* trans, + int64_t* n, int64_t* k, T* alpha, const T** a, int64_t* lda, T* beta, + T** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { for (int64_t i = 0; i < group_count; i++) { - const auto new_uplo = upper_lower[i] == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + const auto new_uplo = upper_lower[i] == oneapi::math::uplo::lower + ? oneapi::math::uplo::upper + : oneapi::math::uplo::lower; upper_lower[i] = new_uplo; const auto new_trans = trans[i] == oneapi::math::transpose::nontrans @@ -2247,10 +2245,10 @@ inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo *upper_lower, } #define SYRK_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, \ - int64_t *k, TYPE *alpha, const TYPE **a, int64_t *lda, TYPE *beta, \ - TYPE **c, int64_t *ldc, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, \ + int64_t* k, TYPE* alpha, const TYPE** a, int64_t* lda, TYPE* beta, \ + TYPE** c, int64_t* ldc, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, \ c, ldc, group_count, group_size, dependencies); \ } @@ -2263,25 +2261,25 @@ SYRK_BATCH_LAUNCHER_USM(std::complex, rocblas_zsyrk_batched) #undef SYRK_BATCH_LAUNCHER_USM template -inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, const T alpha, const T *a, int64_t lda, - int64_t stridea, const T beta, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, const T alpha, const T* a, int64_t lda, + int64_t stridea, const T beta, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::syrk_batch(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, stridea, beta, c, ldc, stridec, batch_size, dependencies); } #define SYRK_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, \ - int64_t k, const TYPE alpha, const TYPE *a, int64_t lda, \ - int64_t stridea, const TYPE beta, TYPE *c, int64_t ldc, \ + sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, \ + int64_t k, const TYPE alpha, const TYPE* a, int64_t lda, \ + int64_t stridea, const TYPE beta, TYPE* c, int64_t ldc, \ int64_t stridec, int64_t batch_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ stridea, beta, c, ldc, stridec, batch_size, dependencies); \ } @@ -2294,20 +2292,20 @@ SYRK_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zsyrk_strided_batc #undef SYRK_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, const T alpha, const T *a, int64_t lda, - int64_t stridea, T *b, int64_t ldb, int64_t strideb, +inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, const T alpha, const T* a, int64_t lda, + int64_t stridea, T* b, int64_t ldb, int64_t strideb, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return column_major::omatcopy_batch(func, queue, trans, n, m, alpha, a, lda, stridea, b, ldb, strideb, batch_size, dependencies); } #define OMATCOPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, const TYPE *a, int64_t lda, int64_t stridea, \ - TYPE *b, int64_t ldb, int64_t strideb, int64_t batch_size, \ - const std::vector &dependencies) { \ + sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, const TYPE* a, int64_t lda, int64_t stridea, \ + TYPE* b, int64_t ldb, int64_t strideb, int64_t batch_size, \ + const std::vector& dependencies) { \ return omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, \ strideb, batch_size, dependencies); \ } @@ -2319,49 +2317,49 @@ OMATCOPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_strided_ #undef OMATCOPY_STRIDED_BATCH_LAUNCHER_USM -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } template -inline sycl::event omatadd_batch(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, const T *a, int64_t lda, - int64_t stridea, const T beta, const T *b, int64_t ldb, - int64_t strideb, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event omatadd_batch(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, const T* a, int64_t lda, + int64_t stridea, const T beta, const T* b, int64_t ldb, + int64_t strideb, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { return column_major::omatadd_batch(func, queue, transa, transb, n, m, alpha, a, lda, stridea, beta, b, ldb, strideb, c, ldc, stridec, batch_size, dependencies); } #define OMATADD_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, const TYPE *a, int64_t lda, \ - int64_t stridea, const TYPE beta, const TYPE *b, int64_t ldb, \ - int64_t strideb, TYPE *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, const TYPE* a, int64_t lda, \ + int64_t stridea, const TYPE beta, const TYPE* b, int64_t ldb, \ + int64_t strideb, TYPE* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return omatadd_batch(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, stridea, \ beta, b, ldb, strideb, c, ldc, stridec, batch_size, dependencies); \ } @@ -2374,19 +2372,19 @@ OMATADD_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_strided_b #undef OMATADD_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, T **b, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, T* alpha, const T** a, int64_t* lda, T** b, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return column_major::omatcopy_batch(func, queue, trans, n, m, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } #define OMATCOPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, \ - TYPE *alpha, const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, \ + TYPE* alpha, const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb, \ group_count, group_size, dependencies); \ } @@ -2398,31 +2396,31 @@ OMATCOPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_batched) #undef OMATCOPY_BATCH_LAUNCHER_USM -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, float **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, float** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, double **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, double** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } diff --git a/src/blas/backends/rocblas/rocblas_extensions.cpp b/src/blas/backends/rocblas/rocblas_extensions.cpp index a0b7d56e8..b649ed1ef 100644 --- a/src/blas/backends/rocblas/rocblas_extensions.cpp +++ b/src/blas/backends/rocblas/rocblas_extensions.cpp @@ -33,65 +33,65 @@ namespace column_major { // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } template -inline void omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, +inline void omatcopy(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); @@ -100,26 +100,26 @@ inline void omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, const int64_t new_m = trans == oneapi::math::transpose::nontrans ? m : n; const int64_t new_n = trans == oneapi::math::transpose::nontrans ? n : m; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, get_rocblas_operation(trans), get_rocblas_operation(trans), new_m, new_n, - (rocDataType *)&alpha, a_, lda, (rocDataType *)&beta, nullptr, + (rocDataType*)&alpha, a_, lda, (rocDataType*)&beta, nullptr, lda, b_, ldb); }); }); } #define OMATCOPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, const TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, const TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { \ omatcopy(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb); \ } @@ -131,16 +131,16 @@ OMATCOPY_LAUNCHER(std::complex, rocblas_zgeam) #undef OMATCOPY_LAUNCHER template -void omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb) { \ + void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb) { \ omatcopy2(#ROCBLAS_ROUTINE, ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, lda, \ b, ldb, strideb); \ } @@ -152,55 +152,55 @@ OMATCOPY2_LAUNCHER(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } template -inline void omatadd(Func func, sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, const T alpha, sycl::buffer &a, int64_t lda, const T beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +inline void omatadd(Func func, sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, const T alpha, sycl::buffer& a, int64_t lda, const T beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, (rocDataType *)&alpha, a_, - lda, (rocDataType *)&beta, b_, ldb, c_, ldc); + get_rocblas_operation(transb), m, n, (rocDataType*)&alpha, a_, + lda, (rocDataType*)&beta, b_, ldb, c_, ldc); }); }); } #define OMATADD_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - const TYPE alpha, sycl::buffer &a, int64_t lda, const TYPE beta, \ - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { \ + void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + const TYPE alpha, sycl::buffer& a, int64_t lda, const TYPE beta, \ + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { \ omatadd(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, \ ldc); \ } @@ -214,72 +214,72 @@ OMATADD_LAUNCHER(std::complex, rocblas_zgeam) // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } template -inline sycl::event omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event omatcopy(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); @@ -287,17 +287,17 @@ inline sycl::event omatcopy(Func func, sycl::queue &queue, transpose trans, int6 const int64_t new_m = trans == oneapi::math::transpose::nontrans ? m : n; const int64_t new_n = trans == oneapi::math::transpose::nontrans ? n : m; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, get_rocblas_operation(trans), get_rocblas_operation(trans), new_m, new_n, - (rocDataType *)&alpha, a_, lda, (rocDataType *)&beta, nullptr, + (rocDataType*)&alpha, a_, lda, (rocDataType*)&beta, nullptr, lda, b_, ldb); }); }); @@ -306,9 +306,9 @@ inline sycl::event omatcopy(Func func, sycl::queue &queue, transpose trans, int6 } #define OMATCOPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, const TYPE *a, int64_t lda, TYPE *b, int64_t ldb, \ - const std::vector &dependencies) { \ + sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, const TYPE* a, int64_t lda, TYPE* b, int64_t ldb, \ + const std::vector& dependencies) { \ return omatcopy(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -320,16 +320,16 @@ OMATCOPY_LAUNCHER_USM(std::complex, rocblas_zgeam) #undef OMATCOPY_LAUNCHER_USM template -sycl::event omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, int64_t stridea, T *b, - int64_t ldb, int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, int64_t stridea, T* b, + int64_t ldb, int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, \ - int64_t strideb, const std::vector &dependencies) { \ + sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, \ + int64_t strideb, const std::vector& dependencies) { \ return omatcopy2(#ROCBLAS_ROUTINE, ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, \ lda, b, ldb, strideb, dependencies); \ } @@ -341,50 +341,50 @@ OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER_USM -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } template -inline sycl::event omatadd(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, const T *a, int64_t lda, - const T beta, const T *b, int64_t ldb, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event omatadd(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, const T* a, int64_t lda, + const T beta, const T* b, int64_t ldb, T* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, (rocDataType *)&alpha, a_, - lda, (rocDataType *)&beta, b_, ldb, c_, ldc); + get_rocblas_operation(transb), m, n, (rocDataType*)&alpha, a_, + lda, (rocDataType*)&beta, b_, ldb, c_, ldc); }); }); @@ -392,10 +392,10 @@ inline sycl::event omatadd(Func func, sycl::queue &queue, transpose transa, tran } #define OMATADD_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, const TYPE *a, int64_t lda, const TYPE beta, \ - const TYPE *b, int64_t ldb, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, const TYPE* a, int64_t lda, const TYPE beta, \ + const TYPE* b, int64_t ldb, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return omatadd(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, \ c, ldc, dependencies); \ } @@ -413,72 +413,72 @@ namespace row_major { // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } template -inline void omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, +inline void omatcopy(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { column_major::omatcopy(func, queue, trans, n, m, alpha, a, lda, b, ldb); } #define OMATCOPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, const TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, const TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { \ omatcopy(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb); \ } @@ -490,16 +490,16 @@ OMATCOPY_LAUNCHER(std::complex, rocblas_zgeam) #undef OMATCOPY_LAUNCHER template -void omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb) { \ + void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb) { \ omatcopy2(#ROCBLAS_ROUTINE, ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, lda, \ b, ldb, strideb); \ } @@ -511,37 +511,37 @@ OMATCOPY2_LAUNCHER(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } template -inline void omatadd(Func func, sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, const T alpha, sycl::buffer &a, int64_t lda, const T beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +inline void omatadd(Func func, sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, const T alpha, sycl::buffer& a, int64_t lda, const T beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { column_major::omatadd(func, queue, transa, transb, n, m, alpha, a, lda, beta, b, ldb, c, ldc); } #define OMATADD_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - const TYPE alpha, sycl::buffer &a, int64_t lda, const TYPE beta, \ - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { \ + void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + const TYPE alpha, sycl::buffer& a, int64_t lda, const TYPE beta, \ + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { \ omatadd(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, \ ldc); \ } @@ -555,79 +555,79 @@ OMATADD_LAUNCHER(std::complex, rocblas_zgeam) // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } template -inline sycl::event omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event omatcopy(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { return column_major::omatcopy(func, queue, trans, n, m, alpha, a, lda, b, ldb, dependencies); } #define OMATCOPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, const TYPE *a, int64_t lda, TYPE *b, int64_t ldb, \ - const std::vector &dependencies) { \ + sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, const TYPE* a, int64_t lda, TYPE* b, int64_t ldb, \ + const std::vector& dependencies) { \ return omatcopy(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -639,16 +639,16 @@ OMATCOPY_LAUNCHER_USM(std::complex, rocblas_zgeam) #undef OMATCOPY_LAUNCHER_USM template -sycl::event omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, int64_t stridea, T *b, - int64_t ldb, int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, int64_t stridea, T* b, + int64_t ldb, int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, \ - int64_t strideb, const std::vector &dependencies) { \ + sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, \ + int64_t strideb, const std::vector& dependencies) { \ return omatcopy2(#ROCBLAS_ROUTINE, ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, \ lda, b, ldb, strideb, dependencies); \ } @@ -660,44 +660,44 @@ OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER_USM -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } template -inline sycl::event omatadd(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, const T *a, int64_t lda, - const T beta, const T *b, int64_t ldb, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event omatadd(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, const T* a, int64_t lda, + const T beta, const T* b, int64_t ldb, T* c, int64_t ldc, + const std::vector& dependencies) { return column_major::omatadd(func, queue, transa, transb, n, m, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } #define OMATADD_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, const TYPE *a, int64_t lda, const TYPE beta, \ - const TYPE *b, int64_t ldb, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, const TYPE* a, int64_t lda, const TYPE beta, \ + const TYPE* b, int64_t ldb, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return omatadd(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, \ c, ldc, dependencies); \ } diff --git a/src/blas/backends/rocblas/rocblas_handle.hpp b/src/blas/backends/rocblas/rocblas_handle.hpp index 66d348619..660be6371 100644 --- a/src/blas/backends/rocblas/rocblas_handle.hpp +++ b/src/blas/backends/rocblas/rocblas_handle.hpp @@ -30,10 +30,10 @@ namespace rocblas { template struct rocblas_handle_ { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t rocblas_handle_mapper_{}; ~rocblas_handle_() noexcept(false) { - for (auto &handle_pair : rocblas_handle_mapper_) { + for (auto& handle_pair : rocblas_handle_mapper_) { rocblas_status err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); diff --git a/src/blas/backends/rocblas/rocblas_helper.hpp b/src/blas/backends/rocblas/rocblas_helper.hpp index f21d63655..5f9b03276 100644 --- a/src/blas/backends/rocblas/rocblas_helper.hpp +++ b/src/blas/backends/rocblas/rocblas_helper.hpp @@ -77,7 +77,7 @@ void overflow_check(Index index, Next... indices) { class rocblas_error : virtual public std::runtime_error { protected: - inline const char *rocblas_error_map(rocblas_status error) { + inline const char* rocblas_error_map(rocblas_status error) { switch (error) { case rocblas_status_success: return "rocblas_status_success"; case rocblas_status_invalid_handle: return "rocblas_status_invalid_handle"; @@ -124,7 +124,7 @@ class rocblas_error : virtual public std::runtime_error { class hip_error : virtual public std::runtime_error { protected: - inline const char *hip_error_map(hipError_t result) { + inline const char* hip_error_map(hipError_t result) { return hipGetErrorName(result); } int error_number; ///< error number @@ -174,12 +174,12 @@ class hip_error : virtual public std::runtime_error { HIP_ERROR_FUNC(hipStreamSynchronize, hip_err, currentStreamId); template -inline void rocblas_native_func(Func func, rocblas_status err, - rocblas_handle handle, Types... args) { +inline void rocblas_native_func(Func func, rocblas_status err, rocblas_handle handle, + Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - ROCBLAS_ERROR_FUNC(func, err, handle, args...) + ROCBLAS_ERROR_FUNC(func, err, handle, args...) #else - ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, args...) + ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, args...) #endif }; diff --git a/src/blas/backends/rocblas/rocblas_level1.cpp b/src/blas/backends/rocblas/rocblas_level1.cpp index b5966108d..31c96ea82 100644 --- a/src/blas/backends/rocblas/rocblas_level1.cpp +++ b/src/blas/backends/rocblas/rocblas_level1.cpp @@ -34,16 +34,16 @@ namespace column_major { // Buffer APIs template -inline void asum(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void asum(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host // when the data is on buffer, it must be set to @@ -51,8 +51,8 @@ inline void asum(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto res_ = sc.get_mem(res_acc); rocblas_status err; // ASUM does not support negative index rocblas_native_func(func, err, handle, n, x_, std::abs(incx), res_); @@ -65,8 +65,8 @@ inline void asum(Func func, sycl::queue &queue, int64_t n, sycl::buffer & } #define ASUM_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ asum(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -78,26 +78,26 @@ ASUM_LAUNCHER(std::complex, double, rocblas_dzasum) #undef ASUM_LAUNCHER template -inline void scal(Func func, sycl::queue &queue, int64_t n, T1 a, sycl::buffer &x, +inline void scal(Func func, sycl::queue& queue, int64_t n, T1 a, sycl::buffer& x, int64_t incx) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; // SCAL does not support negative incx - rocblas_native_func(func, err, handle, n, (rocDataType1 *)&a, x_, std::abs(incx)); + rocblas_native_func(func, err, handle, n, (rocDataType1*)&a, x_, std::abs(incx)); }); }); } #define SCAL_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void scal(sycl::queue &queue, int64_t n, TYPE1 a, sycl::buffer &x, int64_t incx) { \ + void scal(sycl::queue& queue, int64_t n, TYPE1 a, sycl::buffer& x, int64_t incx) { \ scal(ROCBLAS_ROUTINE, queue, n, a, x, incx); \ } @@ -111,29 +111,28 @@ SCAL_LAUNCHER(double, std::complex, rocblas_zdscal) #undef SCAL_LAUNCHER template -inline void axpy(Func func, sycl::queue &queue, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +inline void axpy(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; - rocblas_native_func(func, err, handle, n, (rocDataType *)&alpha, x_, incx, y_, - incy); + rocblas_native_func(func, err, handle, n, (rocDataType*)&alpha, x_, incx, y_, incy); }); }); } #define AXPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void axpy(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void axpy(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ axpy(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy); \ } @@ -144,40 +143,40 @@ AXPY_LAUNCHER(std::complex, rocblas_zaxpy) #undef AXPY_LAUNCHER -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } template -inline void rotg(Func func, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +inline void rotg(Func func, sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto s_acc = s.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host // when the data is on buffer, it must be set to @@ -185,10 +184,10 @@ inline void rotg(Func func, sycl::queue &queue, sycl::buffer &a, sycl::bu // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); - auto s_ = sc.get_mem(s_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); + auto s_ = sc.get_mem(s_acc); rocblas_status err; rocblas_native_func(func, err, handle, a_, b_, c_, s_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -200,8 +199,8 @@ inline void rotg(Func func, sycl::queue &queue, sycl::buffer &a, sycl::bu } #define ROTG_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, \ - sycl::buffer &c, sycl::buffer &s) { \ + void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, \ + sycl::buffer& c, sycl::buffer& s) { \ rotg(ROCBLAS_ROUTINE, queue, a, b, c, s); \ } @@ -213,16 +212,16 @@ ROTG_LAUNCHER(std::complex, double, rocblas_zrotg) #undef ROTG_LAUNCHER template -inline void rotm(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { +inline void rotm(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); auto param_acc = param.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -231,9 +230,9 @@ inline void rotm(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto param_ = sc.get_mem(param_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto param_ = sc.get_mem(param_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, param_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -245,8 +244,8 @@ inline void rotm(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x } #define ROTM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { \ + void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy, sycl::buffer& param) { \ rotm(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, param); \ } @@ -256,19 +255,19 @@ ROTM_LAUNCHER(double, rocblas_drotm) #undef ROTM_LAUNCHER template -inline void copy(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { +inline void copy(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy); }); @@ -276,8 +275,8 @@ inline void copy(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x } #define COPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ copy(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -289,16 +288,16 @@ COPY_LAUNCHER(std::complex, rocblas_zcopy) #undef COPY_LAUNCHER template -inline void dot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +inline void dot(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -307,9 +306,9 @@ inline void dot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto res_ = sc.get_mem(res_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -321,8 +320,8 @@ inline void dot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, } #define DOT_LAUNCHER(EXT, TYPE, ROCBLAS_ROUTINE) \ - void dot##EXT(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, const int64_t incy, sycl::buffer &result) { \ + void dot##EXT(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, const int64_t incy, sycl::buffer& result) { \ dot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, result); \ } @@ -335,23 +334,23 @@ DOT_LAUNCHER(c, std::complex, rocblas_zdotc) #undef DOT_LAUNCHER -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", "for column_major layout"); } template -inline void rot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &y, int64_t incy, T2 c, T3 s) { +inline void rot(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& y, int64_t incy, T2 c, T3 s) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; using rocDataType3 = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host // when the data is on buffer, it must be set to @@ -359,18 +358,18 @@ inline void rot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. // rocblas_set_pointer_mode(handle, rocblas_set_pointer_mode); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; - rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, (rocDataType2 *)&c, - (rocDataType3 *)&s); + rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, (rocDataType2*)&c, + (rocDataType3*)&s); }); }); } #define ROT_LAUNCHER(TYPE1, TYPE2, TYPE3, ROCBLAS_ROUTINE) \ - void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, int64_t incy, TYPE2 c, TYPE3 s) { \ + void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, int64_t incy, TYPE2 c, TYPE3 s) { \ rot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s); \ } @@ -381,16 +380,16 @@ ROT_LAUNCHER(std::complex, double, double, rocblas_zdrot) #undef ROT_LAUNCHER -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { overflow_check(n, incx, incy); // rocBLAS does not support sdot so we need to mimic sdot. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.get_access(cgh); auto y_acc = y.get_access(cgh); auto res_acc = result.get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -399,9 +398,9 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto res_ = sc.get_mem(res_acc); rocblas_status err; rocblas_native_func(rocblas_sdot, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -417,18 +416,18 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, } template -inline void rotmg(Func func, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, T y1, sycl::buffer ¶m) { +inline void rotmg(Func func, sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, T y1, sycl::buffer& param) { using rocDataType = typename RocEquivalentType::Type; sycl::buffer y1_buff(&y1, sycl::range<1>(1)); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto d1_acc = d1.template get_access(cgh); auto d2_acc = d2.template get_access(cgh); auto x1_acc = x1.template get_access(cgh); auto y1_acc = y1_buff.template get_access(cgh); auto param_acc = param.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -437,11 +436,11 @@ inline void rotmg(Func func, sycl::queue &queue, sycl::buffer &d1, sycl::b // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto d1_ = sc.get_mem(d1_acc); - auto d2_ = sc.get_mem(d2_acc); - auto x1_ = sc.get_mem(x1_acc); - auto y1_ = sc.get_mem(y1_acc); - auto param_ = sc.get_mem(param_acc); + auto d1_ = sc.get_mem(d1_acc); + auto d2_ = sc.get_mem(d2_acc); + auto x1_ = sc.get_mem(x1_acc); + auto y1_ = sc.get_mem(y1_acc); + auto param_ = sc.get_mem(param_acc); rocblas_status err; rocblas_native_func(func, err, handle, d1_, d2_, x1_, y1_, param_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -453,8 +452,8 @@ inline void rotmg(Func func, sycl::queue &queue, sycl::buffer &d1, sycl::b } #define ROTMG_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, \ - sycl::buffer &x1, TYPE y1, sycl::buffer ¶m) { \ + void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, \ + sycl::buffer& x1, TYPE y1, sycl::buffer& param) { \ rotmg(ROCBLAS_ROUTINE, queue, d1, d2, x1, y1, param); \ } @@ -464,8 +463,8 @@ ROTMG_LAUNCHER(double, rocblas_drotmg) #undef ROTMG_LAUNCHER template -inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void iamax(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); @@ -477,10 +476,10 @@ inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // to elementwise copy the data between two buffer, or allow reinterpret cast // to convert to different type with different typesize size. sycl::buffer int_res_buff{ sycl::range<1>(1) }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto int_res_acc = int_res_buff.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -489,8 +488,8 @@ inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto int_res_ = sc.get_mem(int_res_acc); + auto x_ = sc.get_mem(x_acc); + auto int_res_ = sc.get_mem(int_res_acc); rocblas_status err; // For negative incx, iamax returns 0. This behaviour is similar to that of // reference netlib BLAS. @@ -502,7 +501,7 @@ inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer & }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto int_res_acc = int_res_buff.template get_access(cgh); auto result_acc = result.template get_access(cgh); cgh.single_task( @@ -511,8 +510,8 @@ inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer & } #define IAMAX_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamax(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -524,19 +523,19 @@ IAMAX_LAUNCHER(std::complex, rocblas_izamax) #undef IAMAX_LAUNCHER template -inline void swap(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { +inline void swap(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy); }); @@ -544,8 +543,8 @@ inline void swap(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x } #define SWAP_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ swap(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -557,8 +556,8 @@ SWAP_LAUNCHER(std::complex, rocblas_zswap) #undef SWAP_LAUNCHER template -inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void iamin(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); @@ -570,10 +569,10 @@ inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // to elementwise copy the data between two buffer, or allow reinterpret cast // to convert to different type with different typesize size. sycl::buffer int_res_buff{ sycl::range<1>(1) }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto int_res_acc = int_res_buff.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -582,8 +581,8 @@ inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto int_res_ = sc.get_mem(int_res_acc); + auto x_ = sc.get_mem(x_acc); + auto int_res_ = sc.get_mem(int_res_acc); rocblas_status err; // For negative incx, iamin returns 0. This behaviour is similar to that of // implemented as a reference IAMIN. @@ -595,7 +594,7 @@ inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer & }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto int_res_acc = int_res_buff.template get_access(cgh); auto result_acc = result.template get_access(cgh); cgh.single_task( @@ -604,8 +603,8 @@ inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer & } #define IAMIN_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamin(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -617,16 +616,16 @@ IAMIN_LAUNCHER(std::complex, rocblas_izamin) #undef IAMIN_LAUNCHER template -inline void nrm2(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void nrm2(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -635,8 +634,8 @@ inline void nrm2(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto res_ = sc.get_mem(res_acc); rocblas_status err; // NRM2 does not support negative index rocblas_native_func(func, err, handle, n, x_, std::abs(incx), res_); @@ -649,8 +648,8 @@ inline void nrm2(Func func, sycl::queue &queue, int64_t n, sycl::buffer & } #define NRM2_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ nrm2(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -664,20 +663,20 @@ NRM2_LAUNCHER(std::complex, double, rocblas_dznrm2) // USM APIs template -inline sycl::event asum(Func func, sycl::queue &queue, int64_t n, const T1 *x, const int64_t incx, - T2 *result, const std::vector &dependencies) { +inline sycl::event asum(Func func, sycl::queue& queue, int64_t n, const T1* x, const int64_t incx, + T2* result, const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = reinterpret_cast(x); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto res_ = reinterpret_cast(result); rocblas_status err; // ASUM does not support negative index rocblas_native_func(func, err, handle, n, x_, std::abs(incx), res_); @@ -689,8 +688,8 @@ inline sycl::event asum(Func func, sycl::queue &queue, int64_t n, const T1 *x, c } #define ASUM_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event asum(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event asum(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return asum(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -702,21 +701,21 @@ ASUM_LAUNCHER_USM(std::complex, double, rocblas_dzasum) #undef ASUM_LAUNCHER_USM template -inline sycl::event scal(Func func, sycl::queue &queue, int64_t n, T1 a, T2 *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event scal(Func func, sycl::queue& queue, int64_t n, T1 a, T2* x, int64_t incx, + const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); + auto x_ = reinterpret_cast(x); rocblas_status err; // SCAL does not support negative incx - rocblas_native_func(func, err, handle, n, (rocDataType1 *)&a, x_, std::abs(incx)); + rocblas_native_func(func, err, handle, n, (rocDataType1*)&a, x_, std::abs(incx)); }); }); @@ -724,8 +723,8 @@ inline sycl::event scal(Func func, sycl::queue &queue, int64_t n, T1 a, T2 *x, i } #define SCAL_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event scal(sycl::queue &queue, int64_t n, TYPE1 a, TYPE2 *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event scal(sycl::queue& queue, int64_t n, TYPE1 a, TYPE2* x, int64_t incx, \ + const std::vector& dependencies) { \ return scal(ROCBLAS_ROUTINE, queue, n, a, x, incx, dependencies); \ } @@ -739,21 +738,20 @@ SCAL_LAUNCHER_USM(double, std::complex, rocblas_zdscal) #undef SCAL_LAUNCHER_USM template -inline sycl::event axpy(Func func, sycl::queue &queue, int64_t n, T alpha, const T *x, int64_t incx, - T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event axpy(Func func, sycl::queue& queue, int64_t n, T alpha, const T* x, int64_t incx, + T* y, int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; - rocblas_native_func(func, err, handle, n, (rocDataType *)&alpha, x_, incx, y_, - incy); + rocblas_native_func(func, err, handle, n, (rocDataType*)&alpha, x_, incx, y_, incy); }); }); @@ -761,8 +759,8 @@ inline sycl::event axpy(Func func, sycl::queue &queue, int64_t n, T alpha, const } #define AXPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - TYPE *y, int64_t incy, const std::vector &dependencies) { \ + sycl::event axpy(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + TYPE* y, int64_t incy, const std::vector& dependencies) { \ return axpy(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, dependencies); \ } @@ -773,44 +771,44 @@ AXPY_LAUNCHER_USM(std::complex, rocblas_zaxpy) #undef AXPY_LAUNCHER_USM -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } template -inline sycl::event rotg(Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, T1 *s, - const std::vector &dependencies) { +inline sycl::event rotg(Func func, sycl::queue& queue, T1* a, T1* b, T2* c, T1* s, + const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); - auto s_ = reinterpret_cast(s); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); rocblas_status err; rocblas_native_func(func, err, handle, a_, b_, c_, s_); }); @@ -820,8 +818,8 @@ inline sycl::event rotg(Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, T1 * } #define ROTG_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event rotg(sycl::queue &queue, TYPE1 *a, TYPE1 *b, TYPE2 *c, TYPE1 *s, \ - const std::vector &dependencies) { \ + sycl::event rotg(sycl::queue& queue, TYPE1* a, TYPE1* b, TYPE2* c, TYPE1* s, \ + const std::vector& dependencies) { \ return rotg(ROCBLAS_ROUTINE, queue, a, b, c, s, dependencies); \ } @@ -833,19 +831,19 @@ ROTG_LAUNCHER_USM(std::complex, double, rocblas_zrotg) #undef ROTG_LAUNCHER_USM template -inline sycl::event rotm(Func func, sycl::queue &queue, int64_t n, T *x, int64_t incx, T *y, - int64_t incy, T *param, const std::vector &dependencies) { +inline sycl::event rotm(Func func, sycl::queue& queue, int64_t n, T* x, int64_t incx, T* y, + int64_t incy, T* param, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto param_ = reinterpret_cast(param); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto param_ = reinterpret_cast(param); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, param_); }); @@ -855,8 +853,8 @@ inline sycl::event rotm(Func func, sycl::queue &queue, int64_t n, T *x, int64_t } #define ROTM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event rotm(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - TYPE *param, const std::vector &dependencies) { \ + sycl::event rotm(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + TYPE* param, const std::vector& dependencies) { \ return rotm(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, param, dependencies); \ } @@ -866,18 +864,18 @@ ROTM_LAUNCHER_USM(double, rocblas_drotm) #undef ROTM_LAUNCHER_USM template -inline sycl::event copy(Func func, sycl::queue &queue, int64_t n, const T *x, int64_t incx, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event copy(Func func, sycl::queue& queue, int64_t n, const T* x, int64_t incx, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy); }); @@ -887,8 +885,8 @@ inline sycl::event copy(Func func, sycl::queue &queue, int64_t n, const T *x, in } #define COPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event copy(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return copy(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -900,20 +898,20 @@ COPY_LAUNCHER_USM(std::complex, rocblas_zcopy) #undef COPY_LAUNCHER_USM template -inline sycl::event dot(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - const T *y, int64_t incy, T *result, - const std::vector &dependencies) { +inline sycl::event dot(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + const T* y, int64_t incy, T* result, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto res_ = reinterpret_cast(result); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, res_); }); @@ -923,9 +921,9 @@ inline sycl::event dot(Func func, sycl::queue &queue, int64_t n, const T *x, con } #define DOT_LAUNCHER_USM(EXT, TYPE, ROCBLAS_ROUTINE) \ - sycl::event dot##EXT(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - const TYPE *y, const int64_t incy, TYPE *result, \ - const std::vector &dependencies) { \ + sycl::event dot##EXT(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + const TYPE* y, const int64_t incy, TYPE* result, \ + const std::vector& dependencies) { \ return dot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, result, dependencies); \ } @@ -938,29 +936,29 @@ DOT_LAUNCHER_USM(c, std::complex, rocblas_zdotc) #undef DOT_LAUNCHER_USM -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { throw unimplemented("blas", "dot", "for column_major layout"); } template -inline sycl::event rot(Func func, sycl::queue &queue, int64_t n, T1 *x, const int64_t incx, T1 *y, - int64_t incy, T2 c, T3 s, const std::vector &dependencies) { +inline sycl::event rot(Func func, sycl::queue& queue, int64_t n, T1* x, const int64_t incx, T1* y, + int64_t incy, T2 c, T3 s, const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; using rocDataType3 = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; - rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, (rocDataType2 *)&c, - (rocDataType3 *)&s); + rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, (rocDataType2*)&c, + (rocDataType3*)&s); }); }); @@ -968,9 +966,9 @@ inline sycl::event rot(Func func, sycl::queue &queue, int64_t n, T1 *x, const in } #define ROT_LAUNCHER_USM(TYPE1, TYPE2, TYPE3, ROCBLAS_ROUTINE) \ - sycl::event rot(sycl::queue &queue, int64_t n, TYPE1 *x, const int64_t incx, TYPE1 *y, \ + sycl::event rot(sycl::queue& queue, int64_t n, TYPE1* x, const int64_t incx, TYPE1* y, \ int64_t incy, TYPE2 c, TYPE3 s, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return rot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s, dependencies); \ } @@ -981,20 +979,20 @@ ROT_LAUNCHER_USM(std::complex, double, double, rocblas_zdrot) #undef ROT_LAUNCHER_USM -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { overflow_check(n, incx, incy); // rocBLAS does not support sdot so we need to mimic sdot. - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto res_ = reinterpret_cast(result); rocblas_status err; rocblas_native_func(rocblas_sdot, err, handle, n, x_, incx, y_, incy, res_); }); @@ -1006,20 +1004,20 @@ sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int6 } template -inline sycl::event rotmg(Func func, sycl::queue &queue, T *d1, T *d2, T *x1, T y1, T *param, - const std::vector &dependencies) { +inline sycl::event rotmg(Func func, sycl::queue& queue, T* d1, T* d2, T* x1, T y1, T* param, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto d1_ = reinterpret_cast(d1); - auto d2_ = reinterpret_cast(d2); - auto x1_ = reinterpret_cast(x1); - auto y1_ = reinterpret_cast(&y1); - auto param_ = reinterpret_cast(param); + auto d1_ = reinterpret_cast(d1); + auto d2_ = reinterpret_cast(d2); + auto x1_ = reinterpret_cast(x1); + auto y1_ = reinterpret_cast(&y1); + auto param_ = reinterpret_cast(param); rocblas_status err; rocblas_native_func(func, err, handle, d1_, d2_, x1_, y1_, param_); }); @@ -1029,8 +1027,8 @@ inline sycl::event rotmg(Func func, sycl::queue &queue, T *d1, T *d2, T *x1, T y } #define ROTMG_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event rotmg(sycl::queue &queue, TYPE *d1, TYPE *d2, TYPE *x1, TYPE y1, TYPE *param, \ - const std::vector &dependencies) { \ + sycl::event rotmg(sycl::queue& queue, TYPE* d1, TYPE* d2, TYPE* x1, TYPE y1, TYPE* param, \ + const std::vector& dependencies) { \ return rotmg(ROCBLAS_ROUTINE, queue, d1, d2, x1, y1, param, dependencies); \ } @@ -1040,8 +1038,8 @@ ROTMG_LAUNCHER_USM(double, rocblas_drotmg) #undef ROTMG_LAUNCHER_USM template -inline sycl::event iamax(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - int64_t *result, const std::vector &dependencies) { +inline sycl::event iamax(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + int64_t* result, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); // rocBLAS does not support int64_t as return type for the data by default. So we need to @@ -1049,17 +1047,17 @@ inline sycl::event iamax(Func func, sycl::queue &queue, int64_t n, const T *x, c // it back to the actual data on the host. // This change may cause failure as the result of integer overflow // based on the size. - auto int_res_p = (int *)sycl::aligned_alloc_shared(64, sizeof(rocblas_int), queue.get_device(), - queue.get_context()); + auto int_res_p = (int*)sycl::aligned_alloc_shared(64, sizeof(rocblas_int), queue.get_device(), + queue.get_context()); *int_res_p = 0; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = reinterpret_cast(x); - auto int_res_p_ = reinterpret_cast(int_res_p); + auto x_ = reinterpret_cast(x); + auto int_res_p_ = reinterpret_cast(int_res_p); rocblas_status err; // For negative incx, iamax returns 0. This behaviour is similar to that of // reference iamax. @@ -1074,8 +1072,8 @@ inline sycl::event iamax(Func func, sycl::queue &queue, int64_t n, const T *x, c } #define IAMAX_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event iamax(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamax(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamax(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1087,18 +1085,18 @@ IAMAX_LAUNCHER_USM(std::complex, rocblas_izamax) #undef IAMAX_LAUNCHER_USM template -inline sycl::event swap(Func func, sycl::queue &queue, int64_t n, T *x, int64_t incx, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event swap(Func func, sycl::queue& queue, int64_t n, T* x, int64_t incx, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy); }); @@ -1108,8 +1106,8 @@ inline sycl::event swap(Func func, sycl::queue &queue, int64_t n, T *x, int64_t } #define SWAP_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event swap(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event swap(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return swap(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1121,8 +1119,8 @@ SWAP_LAUNCHER_USM(std::complex, rocblas_zswap) #undef SWAP_LAUNCHER_USM template -inline sycl::event iamin(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - int64_t *result, const std::vector &dependencies) { +inline sycl::event iamin(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + int64_t* result, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); // rocBLAS does not support int64_t as return type for the data by default. So we need to @@ -1130,18 +1128,18 @@ inline sycl::event iamin(Func func, sycl::queue &queue, int64_t n, const T *x, c // it back to the actual data on the host. // This change may cause failure as the result of integer overflow // based on the size. - auto int_res_p = (int *)sycl::aligned_alloc_shared(64, sizeof(rocblas_int), queue.get_device(), - queue.get_context()); + auto int_res_p = (int*)sycl::aligned_alloc_shared(64, sizeof(rocblas_int), queue.get_device(), + queue.get_context()); *int_res_p = 0; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = reinterpret_cast(x); - auto int_res_p_ = reinterpret_cast(int_res_p); + auto x_ = reinterpret_cast(x); + auto int_res_p_ = reinterpret_cast(int_res_p); rocblas_status err; // For negative incx, iamin returns 0. This behaviour is similar to that of // implemented iamin. @@ -1156,8 +1154,8 @@ inline sycl::event iamin(Func func, sycl::queue &queue, int64_t n, const T *x, c } #define IAMIN_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event iamin(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamin(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamin(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1169,20 +1167,20 @@ IAMIN_LAUNCHER_USM(std::complex, rocblas_izamin) #undef IAMIN_LAUNCHER_USM template -inline sycl::event nrm2(Func func, sycl::queue &queue, int64_t n, const T1 *x, const int64_t incx, - T2 *result, const std::vector &dependencies) { +inline sycl::event nrm2(Func func, sycl::queue& queue, int64_t n, const T1* x, const int64_t incx, + T2* result, const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = reinterpret_cast(x); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto res_ = reinterpret_cast(result); rocblas_status err; // NRM2 does not support negative index rocblas_native_func(func, err, handle, n, x_, std::abs(incx), res_); @@ -1194,8 +1192,8 @@ inline sycl::event nrm2(Func func, sycl::queue &queue, int64_t n, const T1 *x, c } #define NRM2_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event nrm2(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event nrm2(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return nrm2(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1212,14 +1210,14 @@ namespace row_major { // Buffer APIs template -inline void asum(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void asum(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { column_major::asum(func, queue, n, x, incx, result); } #define ASUM_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ asum(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -1231,13 +1229,13 @@ ASUM_LAUNCHER(std::complex, double, rocblas_dzasum) #undef ASUM_LAUNCHER template -inline void scal(Func func, sycl::queue &queue, int64_t n, T1 a, sycl::buffer &x, +inline void scal(Func func, sycl::queue& queue, int64_t n, T1 a, sycl::buffer& x, int64_t incx) { column_major::scal(func, queue, n, a, x, incx); } #define SCAL_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void scal(sycl::queue &queue, int64_t n, TYPE1 a, sycl::buffer &x, int64_t incx) { \ + void scal(sycl::queue& queue, int64_t n, TYPE1 a, sycl::buffer& x, int64_t incx) { \ scal(ROCBLAS_ROUTINE, queue, n, a, x, incx); \ } @@ -1251,14 +1249,14 @@ SCAL_LAUNCHER(double, std::complex, rocblas_zdscal) #undef SCAL_LAUNCHER template -inline void axpy(Func func, sycl::queue &queue, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +inline void axpy(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { column_major::axpy(func, queue, n, alpha, x, incx, y, incy); } #define AXPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void axpy(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void axpy(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ axpy(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy); \ } @@ -1269,37 +1267,37 @@ AXPY_LAUNCHER(std::complex, rocblas_zaxpy) #undef AXPY_LAUNCHER -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } template -inline void rotg(Func func, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +inline void rotg(Func func, sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { column_major::rotg(func, queue, a, b, c, s); } #define ROTG_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, \ - sycl::buffer &c, sycl::buffer &s) { \ + void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, \ + sycl::buffer& c, sycl::buffer& s) { \ rotg(ROCBLAS_ROUTINE, queue, a, b, c, s); \ } @@ -1311,14 +1309,14 @@ ROTG_LAUNCHER(std::complex, double, rocblas_zrotg) #undef ROTG_LAUNCHER template -inline void rotm(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { +inline void rotm(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param) { column_major::rotm(func, queue, n, x, incx, y, incy, param); } #define ROTM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { \ + void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy, sycl::buffer& param) { \ rotm(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, param); \ } @@ -1328,14 +1326,14 @@ ROTM_LAUNCHER(double, rocblas_drotm) #undef ROTM_LAUNCHER template -inline void copy(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { +inline void copy(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { column_major::copy(func, queue, n, x, incx, y, incy); } #define COPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ copy(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -1347,14 +1345,14 @@ COPY_LAUNCHER(std::complex, rocblas_zcopy) #undef COPY_LAUNCHER template -inline void dot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +inline void dot(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { column_major::dot(func, queue, n, x, incx, y, incy, result); } #define DOT_LAUNCHER(EXT, TYPE, ROCBLAS_ROUTINE) \ - void dot##EXT(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, const int64_t incy, sycl::buffer &result) { \ + void dot##EXT(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, const int64_t incy, sycl::buffer& result) { \ dot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, result); \ } @@ -1367,20 +1365,20 @@ DOT_LAUNCHER(c, std::complex, rocblas_zdotc) #undef DOT_LAUNCHER -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", "for row_major layout"); } template -inline void rot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &y, int64_t incy, T2 c, T3 s) { +inline void rot(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& y, int64_t incy, T2 c, T3 s) { column_major::rot(func, queue, n, x, incx, y, incy, c, s); } #define ROT_LAUNCHER(TYPE1, TYPE2, TYPE3, ROCBLAS_ROUTINE) \ - void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, int64_t incy, TYPE2 c, TYPE3 s) { \ + void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, int64_t incy, TYPE2 c, TYPE3 s) { \ rot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s); \ } @@ -1391,20 +1389,20 @@ ROT_LAUNCHER(std::complex, double, double, rocblas_zdrot) #undef ROT_LAUNCHER -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { column_major::sdsdot(queue, n, sb, x, incx, y, incy, result); } template -inline void rotmg(Func func, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, T y1, sycl::buffer ¶m) { +inline void rotmg(Func func, sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, T y1, sycl::buffer& param) { column_major::rotmg(func, queue, d1, d2, x1, y1, param); } #define ROTMG_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, \ - sycl::buffer &x1, TYPE y1, sycl::buffer ¶m) { \ + void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, \ + sycl::buffer& x1, TYPE y1, sycl::buffer& param) { \ rotmg(ROCBLAS_ROUTINE, queue, d1, d2, x1, y1, param); \ } @@ -1414,14 +1412,14 @@ ROTMG_LAUNCHER(double, rocblas_drotmg) #undef ROTMG_LAUNCHER template -inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void iamax(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { column_major::iamax(func, queue, n, x, incx, result); } #define IAMAX_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamax(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -1433,14 +1431,14 @@ IAMAX_LAUNCHER(std::complex, rocblas_izamax) #undef IAMAX_LAUNCHER template -inline void swap(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { +inline void swap(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { column_major::swap(func, queue, n, x, incx, y, incy); } #define SWAP_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ swap(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -1452,14 +1450,14 @@ SWAP_LAUNCHER(std::complex, rocblas_zswap) #undef SWAP_LAUNCHER template -inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void iamin(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { column_major::iamin(func, queue, n, x, incx, result); } #define IAMIN_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamin(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -1471,14 +1469,14 @@ IAMIN_LAUNCHER(std::complex, rocblas_izamin) #undef IAMIN_LAUNCHER template -inline void nrm2(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void nrm2(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { column_major::nrm2(func, queue, n, x, incx, result); } #define NRM2_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ nrm2(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -1492,14 +1490,14 @@ NRM2_LAUNCHER(std::complex, double, rocblas_dznrm2) // USM APIs template -inline sycl::event asum(Func func, sycl::queue &queue, int64_t n, const T1 *x, const int64_t incx, - T2 *result, const std::vector &dependencies) { +inline sycl::event asum(Func func, sycl::queue& queue, int64_t n, const T1* x, const int64_t incx, + T2* result, const std::vector& dependencies) { return column_major::asum(func, queue, n, x, incx, result, dependencies); } #define ASUM_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event asum(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event asum(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return asum(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1511,14 +1509,14 @@ ASUM_LAUNCHER_USM(std::complex, double, rocblas_dzasum) #undef ASUM_LAUNCHER_USM template -inline sycl::event scal(Func func, sycl::queue &queue, int64_t n, T1 a, T2 *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event scal(Func func, sycl::queue& queue, int64_t n, T1 a, T2* x, int64_t incx, + const std::vector& dependencies) { return column_major::scal(func, queue, n, a, x, incx, dependencies); } #define SCAL_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event scal(sycl::queue &queue, int64_t n, TYPE1 a, TYPE2 *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event scal(sycl::queue& queue, int64_t n, TYPE1 a, TYPE2* x, int64_t incx, \ + const std::vector& dependencies) { \ return scal(ROCBLAS_ROUTINE, queue, n, a, x, incx, dependencies); \ } @@ -1532,14 +1530,14 @@ SCAL_LAUNCHER_USM(double, std::complex, rocblas_zdscal) #undef SCAL_LAUNCHER_USM template -inline sycl::event axpy(Func func, sycl::queue &queue, int64_t n, T alpha, const T *x, int64_t incx, - T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event axpy(Func func, sycl::queue& queue, int64_t n, T alpha, const T* x, int64_t incx, + T* y, int64_t incy, const std::vector& dependencies) { return column_major::axpy(func, queue, n, alpha, x, incx, y, incy, dependencies); } #define AXPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - TYPE *y, int64_t incy, const std::vector &dependencies) { \ + sycl::event axpy(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + TYPE* y, int64_t incy, const std::vector& dependencies) { \ return axpy(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, dependencies); \ } @@ -1550,38 +1548,38 @@ AXPY_LAUNCHER_USM(std::complex, rocblas_zaxpy) #undef AXPY_LAUNCHER_USM -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } template -inline sycl::event rotg(Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, T1 *s, - const std::vector &dependencies) { +inline sycl::event rotg(Func func, sycl::queue& queue, T1* a, T1* b, T2* c, T1* s, + const std::vector& dependencies) { return column_major::rotg(func, queue, a, b, c, s, dependencies); } #define ROTG_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event rotg(sycl::queue &queue, TYPE1 *a, TYPE1 *b, TYPE2 *c, TYPE1 *s, \ - const std::vector &dependencies) { \ + sycl::event rotg(sycl::queue& queue, TYPE1* a, TYPE1* b, TYPE2* c, TYPE1* s, \ + const std::vector& dependencies) { \ return rotg(ROCBLAS_ROUTINE, queue, a, b, c, s, dependencies); \ } @@ -1593,14 +1591,14 @@ ROTG_LAUNCHER_USM(std::complex, double, rocblas_zrotg) #undef ROTG_LAUNCHER_USM template -inline sycl::event rotm(Func func, sycl::queue &queue, int64_t n, T *x, int64_t incx, T *y, - int64_t incy, T *param, const std::vector &dependencies) { +inline sycl::event rotm(Func func, sycl::queue& queue, int64_t n, T* x, int64_t incx, T* y, + int64_t incy, T* param, const std::vector& dependencies) { return column_major::rotm(func, queue, n, x, incx, y, incy, param, dependencies); } #define ROTM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event rotm(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - TYPE *param, const std::vector &dependencies) { \ + sycl::event rotm(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + TYPE* param, const std::vector& dependencies) { \ return rotm(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, param, dependencies); \ } @@ -1610,14 +1608,14 @@ ROTM_LAUNCHER_USM(double, rocblas_drotm) #undef ROTM_LAUNCHER_USM template -inline sycl::event copy(Func func, sycl::queue &queue, int64_t n, const T *x, int64_t incx, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event copy(Func func, sycl::queue& queue, int64_t n, const T* x, int64_t incx, T* y, + int64_t incy, const std::vector& dependencies) { return column_major::copy(func, queue, n, x, incx, y, incy, dependencies); } #define COPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event copy(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return copy(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1629,16 +1627,16 @@ COPY_LAUNCHER_USM(std::complex, rocblas_zcopy) #undef COPY_LAUNCHER_USM template -inline sycl::event dot(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - const T *y, int64_t incy, T *result, - const std::vector &dependencies) { +inline sycl::event dot(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + const T* y, int64_t incy, T* result, + const std::vector& dependencies) { return column_major::dot(func, queue, n, x, incx, y, incy, result, dependencies); } #define DOT_LAUNCHER_USM(EXT, TYPE, ROCBLAS_ROUTINE) \ - sycl::event dot##EXT(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - const TYPE *y, const int64_t incy, TYPE *result, \ - const std::vector &dependencies) { \ + sycl::event dot##EXT(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + const TYPE* y, const int64_t incy, TYPE* result, \ + const std::vector& dependencies) { \ return dot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, result, dependencies); \ } @@ -1651,21 +1649,21 @@ DOT_LAUNCHER_USM(c, std::complex, rocblas_zdotc) #undef DOT_LAUNCHER_USM -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { throw unimplemented("blas", "dot", "for row_major layout"); } template -inline sycl::event rot(Func func, sycl::queue &queue, int64_t n, T1 *x, const int64_t incx, T1 *y, - int64_t incy, T2 c, T3 s, const std::vector &dependencies) { +inline sycl::event rot(Func func, sycl::queue& queue, int64_t n, T1* x, const int64_t incx, T1* y, + int64_t incy, T2 c, T3 s, const std::vector& dependencies) { return column_major::rot(func, queue, n, x, incx, y, incy, c, s, dependencies); } #define ROT_LAUNCHER_USM(TYPE1, TYPE2, TYPE3, ROCBLAS_ROUTINE) \ - sycl::event rot(sycl::queue &queue, int64_t n, TYPE1 *x, const int64_t incx, TYPE1 *y, \ + sycl::event rot(sycl::queue& queue, int64_t n, TYPE1* x, const int64_t incx, TYPE1* y, \ int64_t incy, TYPE2 c, TYPE3 s, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return rot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s, dependencies); \ } @@ -1676,21 +1674,21 @@ ROT_LAUNCHER_USM(std::complex, double, double, rocblas_zdrot) #undef ROT_LAUNCHER_USM -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { return column_major::sdsdot(queue, n, sb, x, incx, y, incy, result); } template -inline sycl::event rotmg(Func func, sycl::queue &queue, T *d1, T *d2, T *x1, T y1, T *param, - const std::vector &dependencies) { +inline sycl::event rotmg(Func func, sycl::queue& queue, T* d1, T* d2, T* x1, T y1, T* param, + const std::vector& dependencies) { return column_major::rotmg(func, queue, d1, d2, x1, y1, param, dependencies); } #define ROTMG_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event rotmg(sycl::queue &queue, TYPE *d1, TYPE *d2, TYPE *x1, TYPE y1, TYPE *param, \ - const std::vector &dependencies) { \ + sycl::event rotmg(sycl::queue& queue, TYPE* d1, TYPE* d2, TYPE* x1, TYPE y1, TYPE* param, \ + const std::vector& dependencies) { \ return rotmg(ROCBLAS_ROUTINE, queue, d1, d2, x1, y1, param, dependencies); \ } @@ -1700,14 +1698,14 @@ ROTMG_LAUNCHER_USM(double, rocblas_drotmg) #undef ROTMG_LAUNCHER_USM template -inline sycl::event iamax(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - int64_t *result, const std::vector &dependencies) { +inline sycl::event iamax(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + int64_t* result, const std::vector& dependencies) { return column_major::iamax(func, queue, n, x, incx, result, dependencies); } #define IAMAX_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event iamax(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamax(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamax(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1719,14 +1717,14 @@ IAMAX_LAUNCHER_USM(std::complex, rocblas_izamax) #undef IAMAX_LAUNCHER_USM template -inline sycl::event swap(Func func, sycl::queue &queue, int64_t n, T *x, int64_t incx, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event swap(Func func, sycl::queue& queue, int64_t n, T* x, int64_t incx, T* y, + int64_t incy, const std::vector& dependencies) { return column_major::swap(func, queue, n, x, incx, y, incy, dependencies); } #define SWAP_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event swap(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event swap(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return swap(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1738,14 +1736,14 @@ SWAP_LAUNCHER_USM(std::complex, rocblas_zswap) #undef SWAP_LAUNCHER_USM template -inline sycl::event iamin(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - int64_t *result, const std::vector &dependencies) { +inline sycl::event iamin(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + int64_t* result, const std::vector& dependencies) { return column_major::iamin(func, queue, n, x, incx, result, dependencies); } #define IAMIN_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event iamin(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamin(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamin(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1757,14 +1755,14 @@ IAMIN_LAUNCHER_USM(std::complex, rocblas_izamin) #undef IAMIN_LAUNCHER_USM template -inline sycl::event nrm2(Func func, sycl::queue &queue, int64_t n, const T1 *x, const int64_t incx, - T2 *result, const std::vector &dependencies) { +inline sycl::event nrm2(Func func, sycl::queue& queue, int64_t n, const T1* x, const int64_t incx, + T2* result, const std::vector& dependencies) { return column_major::nrm2(func, queue, n, x, incx, result, dependencies); } #define NRM2_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event nrm2(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event nrm2(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return nrm2(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } diff --git a/src/blas/backends/rocblas/rocblas_level2.cpp b/src/blas/backends/rocblas/rocblas_level2.cpp index dd62a8558..20c08c5ce 100644 --- a/src/blas/backends/rocblas/rocblas_level2.cpp +++ b/src/blas/backends/rocblas/rocblas_level2.cpp @@ -28,7 +28,7 @@ // Helper Functions template -static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf, const int64_t len, +static inline void conj_vector(sycl::handler& cgh, sycl::buffer& buf, const int64_t len, const int64_t inc) { const auto abs_inc = std::abs(inc); auto acc = buf.template get_access(cgh); @@ -38,7 +38,7 @@ static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf, const i }); } template -static inline void conj_vector(sycl::handler &cgh, T *ptr, const int64_t len, const int64_t inc) { +static inline void conj_vector(sycl::handler& cgh, T* ptr, const int64_t len, const int64_t inc) { const auto abs_inc = std::abs(inc); cgh.parallel_for(sycl::range{ (std::size_t)len }, [=](sycl::id<1> id) { const auto index = id * abs_inc; @@ -47,7 +47,7 @@ static inline void conj_vector(sycl::handler &cgh, T *ptr, const int64_t len, co } template -static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf_a, sycl::buffer &buf_b, +static inline void conj_vector(sycl::handler& cgh, sycl::buffer& buf_a, sycl::buffer& buf_b, const int64_t len, const int64_t inc_a, const int64_t inc_b) { const auto abs_inc_a = std::abs(inc_a); const auto abs_inc_b = std::abs(inc_b); @@ -61,7 +61,7 @@ static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf_a, sycl: }); } template -static inline void conj_vector(sycl::handler &cgh, T *ptr_a, T *ptr_b, const int64_t len, +static inline void conj_vector(sycl::handler& cgh, T* ptr_a, T* ptr_b, const int64_t len, const int64_t inc_a, const int64_t inc_b) { const auto abs_inc_a = std::abs(inc_a); const auto abs_inc_b = std::abs(inc_b); @@ -82,34 +82,34 @@ namespace column_major { // Buffer APIs template -inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define GEMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ gemv(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -121,34 +121,34 @@ GEMV_LAUNCHER(std::complex, rocblas_zgemv) #undef GEMV_LAUNCHER template -inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, - int64_t ku, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, kl, ku, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, kl, ku, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define GBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ - int64_t incx, TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ + int64_t incx, TYPE beta, sycl::buffer& y, int64_t incy) { \ gbmv(ROCBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -160,32 +160,32 @@ GBMV_LAUNCHER(std::complex, rocblas_zgbmv) #undef GBMV_LAUNCHER template -inline void ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +inline void ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; - rocblas_native_func(func, err, handle, m, n, (rocDataType *)&alpha, x_, incx, y_, - incy, a_, lda); + rocblas_native_func(func, err, handle, m, n, (rocDataType*)&alpha, x_, incx, y_, incy, + a_, lda); }); }); } #define GER_LAUNCHER(EXT, TYPE, ROCBLAS_ROUTINE) \ - void ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, \ + void ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, \ int64_t lda) { \ ger(ROCBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda); \ } @@ -200,34 +200,34 @@ GER_LAUNCHER(c, std::complex, rocblas_zgerc) #undef GER_LAUNCHER template -inline void hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, k, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define HBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -237,34 +237,34 @@ HBMV_LAUNCHER(std::complex, rocblas_zhbmv) #undef HBMV_LAUNCHER template -inline void hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define HEMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hemv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -274,31 +274,31 @@ HEMV_LAUNCHER(std::complex, rocblas_zhemv) #undef HEMV_LAUNCHER template -inline void her(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, ScalarType alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, +inline void her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { using rocScalarType = typename RocEquivalentType::Type; using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocScalarType *)&alpha, x_, incx, a_, lda); + (rocScalarType*)&alpha, x_, incx, a_, lda); }); }); } #define HER_LAUNCHER(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - void her(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, \ + void her(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, \ int64_t lda) { \ her(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -309,33 +309,33 @@ HER_LAUNCHER(double, std::complex, rocblas_zher) #undef HER_LAUNCHER template -inline void her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_, lda); + (rocDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define HER2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ her2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); \ } @@ -345,34 +345,33 @@ HER2_LAUNCHER(std::complex, rocblas_zher2) #undef HER2_LAUNCHER template -inline void hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, x_, incx, (rocDataType *)&beta, y_, - incy); + (rocDataType*)&alpha, a_, x_, incx, (rocDataType*)&beta, y_, incy); }); }); } #define HPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ hpmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); \ } @@ -382,30 +381,30 @@ HPMV_LAUNCHER(std::complex, rocblas_zhpmv) #undef HPMV_LAUNCHER template -inline void hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, ScalarType alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a) { using rocScalarType = typename RocEquivalentType::Type; using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocScalarType *)&alpha, x_, incx, a_); + (rocScalarType*)&alpha, x_, incx, a_); }); }); } #define HPR_LAUNCHER(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ hpr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -415,33 +414,33 @@ HPR_LAUNCHER(double, std::complex, rocblas_zhpr) #undef HPR_LAUNCHER template -inline void hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_); + (rocDataType*)&alpha, x_, incx, y_, incy, a_); }); }); } #define HPR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ hpr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -451,34 +450,34 @@ HPR2_LAUNCHER(std::complex, rocblas_zhpr2) #undef HPR2_LAUNCHER template -inline void sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, k, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define SBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ sbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -488,34 +487,34 @@ SBMV_LAUNCHER(double, rocblas_dsbmv) #undef SBMV_LAUNCHER template -inline void symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define SYMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ symv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -525,29 +524,29 @@ SYMV_LAUNCHER(double, rocblas_dsymv) #undef SYMV_LAUNCHER template -inline void syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { +inline void syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, a_, lda); + (rocDataType*)&alpha, x_, incx, a_, lda); }); }); } #define SYR_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { \ + void syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { \ syr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -560,33 +559,33 @@ SYR_LAUNCHER(std::complex, rocblas_zsyr) #undef SYR_LAUNCHER template -inline void syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_, lda); + (rocDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define SYR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ syr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); \ } @@ -599,34 +598,33 @@ SYR2_LAUNCHER(std::complex, rocblas_zsyr2) #undef SYR2_LAUNCHER template -inline void spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, x_, incx, (rocDataType *)&beta, y_, - incy); + (rocDataType*)&alpha, a_, x_, incx, (rocDataType*)&beta, y_, incy); }); }); } #define SPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ spmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); \ } @@ -636,29 +634,29 @@ SPMV_LAUNCHER(double, rocblas_dspmv) #undef SPMV_LAUNCHER template -inline void spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, a_); + (rocDataType*)&alpha, x_, incx, a_); }); }); } #define SPR_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ spr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -668,33 +666,33 @@ SPR_LAUNCHER(double, rocblas_dspr) #undef SPR_LAUNCHER template -inline void spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_); + (rocDataType*)&alpha, x_, incx, y_, incy, a_); }); }); } #define SPR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ spr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -704,31 +702,31 @@ SPR2_LAUNCHER(double, rocblas_dspr2) #undef SPR2_LAUNCHER template -inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, k, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + k, a_, lda, x_, incx); }); }); } #define TBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); \ } @@ -741,31 +739,31 @@ TBMV_LAUNCHER(std::complex, rocblas_ztbmv) #undef TBMV_LAUNCHER template -inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, k, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + k, a_, lda, x_, incx); }); }); } #define TBSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); \ } @@ -778,30 +776,30 @@ TBSV_LAUNCHER(std::complex, rocblas_ztbsv) #undef TBSV_LAUNCHER template -inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, sycl::buffer &x, int64_t incx) { +inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, x_, incx); }); }); } #define TPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx); \ } @@ -813,30 +811,30 @@ TPMV_LAUNCHER(std::complex, rocblas_ztpmv) #undef TPMV_LAUNCHER template -inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, sycl::buffer &x, int64_t incx) { +inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, x_, incx); }); }); } #define TPSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx); \ } @@ -848,31 +846,31 @@ TPSV_LAUNCHER(std::complex, rocblas_ztpsv) #undef TPSV_LAUNCHER template -inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, lda, x_, incx); }); }); } #define TRMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); \ } @@ -884,31 +882,31 @@ TRMV_LAUNCHER(std::complex, rocblas_ztrmv) #undef TRMV_LAUNCHER template -inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, lda, x_, incx); }); }); } #define TRSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); \ } @@ -922,24 +920,24 @@ TRSV_LAUNCHER(std::complex, rocblas_ztrsv) // USM APIs template -inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -947,9 +945,9 @@ inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t } #define GEMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return gemv(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -962,25 +960,25 @@ GEMV_LAUNCHER_USM(std::complex, rocblas_zgemv) #undef GEMV_LAUNCHER_USM template -inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + int64_t kl, int64_t ku, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, kl, ku, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, kl, ku, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -988,10 +986,10 @@ inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t } #define GBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ - int64_t ku, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *x, \ - int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ + int64_t ku, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* x, \ + int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return gbmv(ROCBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -1004,23 +1002,23 @@ GBMV_LAUNCHER_USM(std::complex, rocblas_zgbmv) #undef GBMV_LAUNCHER_USM template -inline sycl::event ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, const T *x, - int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, const T* x, + int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; - rocblas_native_func(func, err, handle, m, n, (rocDataType *)&alpha, x_, incx, y_, - incy, a_, lda); + rocblas_native_func(func, err, handle, m, n, (rocDataType*)&alpha, x_, incx, y_, incy, + a_, lda); }); }); @@ -1028,9 +1026,9 @@ inline sycl::event ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T al } #define GER_LAUNCHER_USM(EXT, TYPE, ROCBLAS_ROUTINE) \ - sycl::event ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return ger(ROCBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); \ } @@ -1044,24 +1042,24 @@ GER_LAUNCHER_USM(c, std::complex, rocblas_zgerc) #undef GER_LAUNCHER_USM template -inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, k, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -1069,9 +1067,9 @@ inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return hbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -1082,24 +1080,24 @@ HBMV_LAUNCHER_USM(std::complex, rocblas_zhbmv) #undef HBMV_LAUNCHER_USM template -inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -1107,9 +1105,9 @@ inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HEMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hemv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -1120,23 +1118,23 @@ HEMV_LAUNCHER_USM(std::complex, rocblas_zhemv) #undef HEMV_LAUNCHER_USM template -inline sycl::event her(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - const ScalarType alpha, const DataType *x, int64_t incx, DataType *a, - int64_t lda, const std::vector &dependencies) { +inline sycl::event her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + const ScalarType alpha, const DataType* x, int64_t incx, DataType* a, + int64_t lda, const std::vector& dependencies) { using rocScalarType = typename RocEquivalentType::Type; using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocScalarType *)&alpha, x_, incx, a_, lda); + (rocScalarType*)&alpha, x_, incx, a_, lda); }); }); @@ -1144,9 +1142,9 @@ inline sycl::event her(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HER_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); \ } @@ -1156,23 +1154,23 @@ HER_LAUNCHER_USM(double, std::complex, rocblas_zher) #undef HER_LAUNCHER_USM template -inline sycl::event her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_, lda); + (rocDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -1180,9 +1178,9 @@ inline sycl::event her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HER2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -1193,24 +1191,23 @@ HER2_LAUNCHER_USM(std::complex, rocblas_zher2) #undef HER2_LAUNCHER_USM template -inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, x_, incx, (rocDataType *)&beta, y_, - incy); + (rocDataType*)&alpha, a_, x_, incx, (rocDataType*)&beta, y_, incy); }); }); @@ -1218,9 +1215,9 @@ inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hpmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, \ dependencies); \ } @@ -1231,23 +1228,23 @@ HPMV_LAUNCHER_USM(std::complex, rocblas_zhpmv) #undef HPMV_LAUNCHER_USM template -inline sycl::event hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - const ScalarType alpha, const DataType *x, int64_t incx, DataType *a, - const std::vector &dependencies) { +inline sycl::event hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + const ScalarType alpha, const DataType* x, int64_t incx, DataType* a, + const std::vector& dependencies) { using rocScalarType = typename RocEquivalentType::Type; using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocScalarType *)&alpha, x_, incx, a_); + (rocScalarType*)&alpha, x_, incx, a_); }); }); @@ -1255,9 +1252,9 @@ inline sycl::event hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPR_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, \ + const std::vector& dependencies) { \ return hpr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, dependencies); \ } @@ -1267,23 +1264,23 @@ HPR_LAUNCHER_USM(double, std::complex, rocblas_zhpr) #undef HPR_LAUNCHER_USM template -inline sycl::event hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, - const std::vector &dependencies) { +inline sycl::event hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_); + (rocDataType*)&alpha, x_, incx, y_, incy, a_); }); }); @@ -1291,9 +1288,9 @@ inline sycl::event hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return hpr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ dependencies); \ } @@ -1304,24 +1301,24 @@ HPR2_LAUNCHER_USM(std::complex, rocblas_zhpr2) #undef HPR2_LAUNCHER_USM template -inline sycl::event sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, k, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -1329,9 +1326,9 @@ inline sycl::event sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return sbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -1342,24 +1339,24 @@ SBMV_LAUNCHER_USM(double, rocblas_dsbmv) #undef SBMV_LAUNCHER_USM template -inline sycl::event symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -1367,9 +1364,9 @@ inline sycl::event symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SYMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return symv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -1380,22 +1377,22 @@ SYMV_LAUNCHER_USM(double, rocblas_dsymv) #undef SYMV_LAUNCHER_USM template -inline sycl::event syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, T* a, int64_t lda, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, a_, lda); + (rocDataType*)&alpha, x_, incx, a_, lda); }); }); @@ -1403,9 +1400,9 @@ inline sycl::event syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SYR_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); \ } @@ -1418,23 +1415,23 @@ SYR_LAUNCHER_USM(std::complex, rocblas_zsyr) #undef SYR_LAUNCHER_USM template -inline sycl::event syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_, lda); + (rocDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -1442,9 +1439,9 @@ inline sycl::event syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SYR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -1458,24 +1455,23 @@ SYR2_LAUNCHER_USM(std::complex, rocblas_zsyr2) #undef SYR2_LAUNCHER_USM template -inline sycl::event spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, x_, incx, (rocDataType *)&beta, y_, - incy); + (rocDataType*)&alpha, a_, x_, incx, (rocDataType*)&beta, y_, incy); }); }); @@ -1483,9 +1479,9 @@ inline sycl::event spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return spmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, \ dependencies); \ } @@ -1496,21 +1492,21 @@ SPMV_LAUNCHER_USM(double, rocblas_dspmv) #undef SPMV_LAUNCHER_USM template -inline sycl::event spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, T *a, - const std::vector &dependencies) { +inline sycl::event spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, T* a, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, a_); + (rocDataType*)&alpha, x_, incx, a_); }); }); @@ -1518,8 +1514,8 @@ inline sycl::event spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SPR_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, const std::vector &dependencies) { \ + sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, const std::vector& dependencies) { \ return spr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, dependencies); \ } @@ -1529,23 +1525,23 @@ SPR_LAUNCHER_USM(double, rocblas_dspr) #undef SPR_LAUNCHER_USM template -inline sycl::event spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, - const std::vector &dependencies) { +inline sycl::event spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_); + (rocDataType*)&alpha, x_, incx, y_, incy, a_); }); }); @@ -1553,9 +1549,9 @@ inline sycl::event spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SPR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return spr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ dependencies); \ } @@ -1566,23 +1562,23 @@ SPR2_LAUNCHER_USM(double, rocblas_dspr2) #undef SPR2_LAUNCHER_USM template -inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, k, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + k, a_, lda, x_, incx); }); }); @@ -1590,9 +1586,9 @@ inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, \ dependencies); \ } @@ -1605,23 +1601,23 @@ TBMV_LAUNCHER_USM(std::complex, rocblas_ztbmv) #undef TBMV_LAUNCHER_USM template -inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, k, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + k, a_, lda, x_, incx); }); }); @@ -1629,9 +1625,9 @@ inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TBSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, \ dependencies); \ } @@ -1644,23 +1640,23 @@ TBSV_LAUNCHER_USM(std::complex, rocblas_ztbsv) #undef TBSV_LAUNCHER_USM template -inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, x_, incx); }); }); @@ -1668,9 +1664,9 @@ inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx, \ dependencies); \ } @@ -1683,23 +1679,23 @@ TPMV_LAUNCHER_USM(std::complex, rocblas_ztpmv) #undef TPMV_LAUNCHER_USM template -inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, x_, incx); }); }); @@ -1707,9 +1703,9 @@ inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TPSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx, \ dependencies); \ } @@ -1722,23 +1718,23 @@ TPSV_LAUNCHER_USM(std::complex, rocblas_ztpsv) #undef TPSV_LAUNCHER_USM template -inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, lda, x_, incx); }); }); @@ -1746,9 +1742,9 @@ inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TRMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, \ dependencies); \ } @@ -1761,23 +1757,23 @@ TRMV_LAUNCHER_USM(std::complex, rocblas_ztrmv) #undef TRMV_LAUNCHER_USM template -inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, lda, x_, incx); }); }); @@ -1785,9 +1781,9 @@ inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TRSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, \ dependencies); \ } @@ -1806,22 +1802,22 @@ namespace row_major { // Buffer APIs template -inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { alpha = std::conj(alpha); beta = std::conj(beta); if (m > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, m, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, m, incx); }); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } @@ -1830,25 +1826,25 @@ inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int6 if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } template -inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::gemv(func, queue, new_trans, n, m, alpha, a, lda, x, incx, beta, y, incy); } #define GEMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ gemv(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -1860,22 +1856,22 @@ GEMV_LAUNCHER(std::complex, rocblas_zgemv) #undef GEMV_LAUNCHER template -inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, - int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx, - std::complex beta, sycl::buffer, 1> &y, int64_t incy) { +inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx, + std::complex beta, sycl::buffer, 1>& y, int64_t incy) { auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { alpha = std::conj(alpha); beta = std::conj(beta); if (m > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, m, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, m, incx); }); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } @@ -1884,25 +1880,25 @@ inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int6 if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } template -inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, - int64_t ku, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::gbmv(func, queue, new_trans, n, m, ku, kl, alpha, a, lda, x, incx, beta, y, incy); } #define GBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ - int64_t incx, TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ + int64_t incx, TYPE beta, sycl::buffer& y, int64_t incy) { \ gbmv(ROCBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -1914,35 +1910,35 @@ GBMV_LAUNCHER(std::complex, rocblas_zgbmv) #undef GBMV_LAUNCHER template -inline void gerc(Func func, sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { +inline void gerc(Func func, sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda); } template -inline void geru(Func func, sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { +inline void geru(Func func, sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda); } template -inline void ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +inline void ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda); } #define GER_LAUNCHER(EXT, TYPE, ROCBLAS_ROUTINE) \ - void ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, \ + void ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, \ int64_t lda) { \ ger##EXT(ROCBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda); \ } @@ -1957,29 +1953,29 @@ GER_LAUNCHER(c, std::complex, rocblas_zgeru) #undef GER_LAUNCHER template -inline void hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_alpha = std::conj(alpha); auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::hbmv(func, queue, new_uplo, n, k, new_alpha, a, lda, x, incx, new_beta, y, incy); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } #define HBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -1989,29 +1985,29 @@ HBMV_LAUNCHER(std::complex, rocblas_zhbmv) #undef HBMV_LAUNCHER template -inline void hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_alpha = std::conj(alpha); auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::hemv(func, queue, new_uplo, n, new_alpha, a, lda, x, incx, new_beta, y, incy); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } #define HEMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hemv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -2021,22 +2017,22 @@ HEMV_LAUNCHER(std::complex, rocblas_zhemv) #undef HEMV_LAUNCHER template -inline void her(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, ScalarType alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, +inline void her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } column_major::her(func, queue, new_uplo, n, alpha, x, incx, a, lda); } #define HER_LAUNCHER(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - void her(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, \ + void her(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, \ int64_t lda) { \ her(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -2047,23 +2043,23 @@ HER_LAUNCHER(double, std::complex, rocblas_zher) #undef HER_LAUNCHER template -inline void her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::her2(func, queue, new_uplo, n, alpha, y, incy, x, incx, a, lda); } #define HER2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ her2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); \ } @@ -2073,29 +2069,29 @@ HER2_LAUNCHER(std::complex, rocblas_zher2) #undef HER2_LAUNCHER template -inline void hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_alpha = std::conj(alpha); auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::hpmv(func, queue, new_uplo, n, new_alpha, a, x, incx, new_beta, y, incy); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } #define HPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ hpmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); \ } @@ -2105,21 +2101,21 @@ HPMV_LAUNCHER(std::complex, rocblas_zhpmv) #undef HPMV_LAUNCHER template -inline void hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, ScalarType alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } column_major::hpr(func, queue, new_uplo, n, alpha, x, incx, a); } #define HPR_LAUNCHER(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ hpr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -2129,23 +2125,23 @@ HPR_LAUNCHER(double, std::complex, rocblas_zhpr) #undef HPR_LAUNCHER template -inline void hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::hpr2(func, queue, new_uplo, n, alpha, y, incy, x, incx, a); } #define HPR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ hpr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -2155,19 +2151,19 @@ HPR2_LAUNCHER(std::complex, rocblas_zhpr2) #undef HPR2_LAUNCHER template -inline void sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::sbmv(func, queue, new_uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); } #define SBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ sbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -2177,19 +2173,19 @@ SBMV_LAUNCHER(double, rocblas_dsbmv) #undef SBMV_LAUNCHER template -inline void symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::symv(func, queue, new_uplo, n, alpha, a, lda, x, incx, beta, y, incy); } #define SYMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ symv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -2199,17 +2195,17 @@ SYMV_LAUNCHER(double, rocblas_dsymv) #undef SYMV_LAUNCHER template -inline void syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { +inline void syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::syr(func, queue, new_uplo, n, alpha, x, incx, a, lda); } #define SYR_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { \ + void syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { \ syr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -2222,19 +2218,19 @@ SYR_LAUNCHER(std::complex, rocblas_zsyr) #undef SYR_LAUNCHER template -inline void syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::syr2(func, queue, new_uplo, n, alpha, x, incx, y, incy, a, lda); } #define SYR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ syr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); \ } @@ -2247,19 +2243,19 @@ SYR2_LAUNCHER(std::complex, rocblas_zsyr2) #undef SYR2_LAUNCHER template -inline void spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::spmv(func, queue, new_uplo, n, alpha, a, x, incx, beta, y, incy); } #define SPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ spmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); \ } @@ -2269,17 +2265,17 @@ SPMV_LAUNCHER(double, rocblas_dspmv) #undef SPMV_LAUNCHER template -inline void spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::spr(func, queue, new_uplo, n, alpha, x, incx, a); } #define SPR_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ spr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -2289,19 +2285,19 @@ SPR_LAUNCHER(double, rocblas_dspr) #undef SPR_LAUNCHER template -inline void spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::spr2(func, queue, new_uplo, n, alpha, x, incx, y, incy, a); } #define SPR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ spr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -2311,17 +2307,17 @@ SPR2_LAUNCHER(double, rocblas_dspr2) #undef SPR2_LAUNCHER template -inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { +inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2329,26 +2325,26 @@ inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::tbmv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx); } #define TBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); \ } @@ -2361,17 +2357,17 @@ TBMV_LAUNCHER(std::complex, rocblas_ztbmv) #undef TBMV_LAUNCHER template -inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { +inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2379,26 +2375,26 @@ inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::tbsv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx); } #define TBSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); \ } @@ -2411,17 +2407,17 @@ TBSV_LAUNCHER(std::complex, rocblas_ztbsv) #undef TBSV_LAUNCHER template -inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { +inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2429,25 +2425,25 @@ inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, sycl::buffer &x, int64_t incx) { +inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::tpmv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx); } #define TPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx); \ } @@ -2459,17 +2455,17 @@ TPMV_LAUNCHER(std::complex, rocblas_ztpmv) #undef TPMV_LAUNCHER template -inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { +inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2477,25 +2473,25 @@ inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, sycl::buffer &x, int64_t incx) { +inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::tpsv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx); } #define TPSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx); \ } @@ -2507,17 +2503,17 @@ TPSV_LAUNCHER(std::complex, rocblas_ztpsv) #undef TPSV_LAUNCHER template -inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { +inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2525,26 +2521,26 @@ inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::trmv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx); } #define TRMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); \ } @@ -2556,17 +2552,17 @@ TRMV_LAUNCHER(std::complex, rocblas_ztrmv) #undef TRMV_LAUNCHER template -inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { +inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2574,26 +2570,26 @@ inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::trsv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx); } #define TRSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); \ } @@ -2607,15 +2603,15 @@ TRSV_LAUNCHER(std::complex, rocblas_ztrsv) // USM APIs template -inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { sycl::event done; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { alpha = std::conj(alpha); @@ -2623,10 +2619,10 @@ inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t if (m > 0) { done = queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, (std::complex *)x, m, incx); }); + [&](sycl::handler& cgh) { conj_vector(cgh, (std::complex*)x, m, incx); }); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + done = queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } @@ -2638,7 +2634,7 @@ inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2649,20 +2645,20 @@ inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t } template -inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::gemv(func, queue, new_trans, n, m, alpha, a, lda, x, incx, beta, y, incy, dependencies); } #define GEMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return gemv(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -2675,15 +2671,15 @@ GEMV_LAUNCHER_USM(std::complex, rocblas_zgemv) #undef GEMV_LAUNCHER_USM template -inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + int64_t kl, int64_t ku, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { sycl::event done; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { alpha = std::conj(alpha); @@ -2691,10 +2687,10 @@ inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t if (m > 0) { done = queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, (std::complex *)x, m, incx); }); + [&](sycl::handler& cgh) { conj_vector(cgh, (std::complex*)x, m, incx); }); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + done = queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } @@ -2706,7 +2702,7 @@ inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2717,22 +2713,22 @@ inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t } template -inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + int64_t kl, int64_t ku, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::gbmv(func, queue, new_trans, n, m, ku, kl, alpha, a, lda, x, incx, beta, y, incy, dependencies); } #define GBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ - int64_t ku, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *x, \ - int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ + int64_t ku, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* x, \ + int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return gbmv(ROCBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -2745,12 +2741,12 @@ GBMV_LAUNCHER_USM(std::complex, rocblas_zgbmv) #undef GBMV_LAUNCHER_USM template -inline sycl::event gerc(Func func, sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event gerc(Func func, sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (std::complex *)y, n, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (std::complex*)y, n, incy); }) .wait_and_throw(); } @@ -2758,24 +2754,24 @@ inline sycl::event gerc(Func func, sycl::queue &queue, int64_t m, int64_t n, std } template -inline sycl::event geru(Func func, sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event geru(Func func, sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { return column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda, dependencies); } template -inline sycl::event ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, const T *x, - int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, const T* x, + int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { return column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda, dependencies); } #define GER_LAUNCHER_USM(EXT, TYPE, ROCBLAS_ROUTINE) \ - sycl::event ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return ger##EXT(ROCBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -2790,18 +2786,18 @@ GER_LAUNCHER_USM(c, std::complex, rocblas_zgeru) #undef GER_LAUNCHER_USM template -inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_alpha = std::conj(alpha); auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, y, n, incx, incy); }) .wait_and_throw(); } @@ -2809,7 +2805,7 @@ inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t incy, dependencies); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2819,9 +2815,9 @@ inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return hbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -2832,18 +2828,18 @@ HBMV_LAUNCHER_USM(std::complex, rocblas_zhbmv) #undef HBMV_LAUNCHER_USM template -inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_alpha = std::conj(alpha); auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, y, n, incx, incy); }) .wait_and_throw(); } @@ -2851,7 +2847,7 @@ inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t incy, dependencies); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2861,9 +2857,9 @@ inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HEMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hemv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -2874,14 +2870,14 @@ HEMV_LAUNCHER_USM(std::complex, rocblas_zhemv) #undef HEMV_LAUNCHER_USM template -inline sycl::event her(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - const ScalarType alpha, const DataType *x, int64_t incx, DataType *a, - int64_t lda, const std::vector &dependencies) { +inline sycl::event her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + const ScalarType alpha, const DataType* x, int64_t incx, DataType* a, + int64_t lda, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (DataType *)x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (DataType*)x, n, incx); }) .wait_and_throw(); } @@ -2889,9 +2885,9 @@ inline sycl::event her(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HER_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); \ } @@ -2901,14 +2897,14 @@ HER_LAUNCHER_USM(double, std::complex, rocblas_zher) #undef HER_LAUNCHER_USM template -inline sycl::event her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, (T *)y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, (T*)y, n, incx, incy); }) .wait_and_throw(); } @@ -2917,9 +2913,9 @@ inline sycl::event her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HER2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -2930,18 +2926,18 @@ HER2_LAUNCHER_USM(std::complex, rocblas_zher2) #undef HER2_LAUNCHER_USM template -inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_alpha = std::conj(alpha); auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, y, n, incx, incy); }) .wait_and_throw(); } @@ -2949,7 +2945,7 @@ inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t dependencies); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2959,9 +2955,9 @@ inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hpmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, \ dependencies); \ } @@ -2972,14 +2968,14 @@ HPMV_LAUNCHER_USM(std::complex, rocblas_zhpmv) #undef HPMV_LAUNCHER_USM template -inline sycl::event hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - const ScalarType alpha, const DataType *x, int64_t incx, DataType *a, - const std::vector &dependencies) { +inline sycl::event hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + const ScalarType alpha, const DataType* x, int64_t incx, DataType* a, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (DataType *)x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (DataType*)x, n, incx); }) .wait_and_throw(); } @@ -2987,9 +2983,9 @@ inline sycl::event hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPR_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, \ + const std::vector& dependencies) { \ return hpr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, dependencies); \ } @@ -2999,14 +2995,14 @@ HPR_LAUNCHER_USM(double, std::complex, rocblas_zhpr) #undef HPR_LAUNCHER_USM template -inline sycl::event hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, - const std::vector &dependencies) { +inline sycl::event hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, (T *)y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, (T*)y, n, incx, incy); }) .wait_and_throw(); } @@ -3014,9 +3010,9 @@ inline sycl::event hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return hpr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ dependencies); \ } @@ -3027,20 +3023,20 @@ HPR2_LAUNCHER_USM(std::complex, rocblas_zhpr2) #undef HPR2_LAUNCHER_USM template -inline sycl::event sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::sbmv(func, queue, new_uplo, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } #define SBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return sbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -3051,20 +3047,20 @@ SBMV_LAUNCHER_USM(double, rocblas_dsbmv) #undef SBMV_LAUNCHER_USM template -inline sycl::event symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::symv(func, queue, new_uplo, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } #define SYMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return symv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -3075,19 +3071,19 @@ SYMV_LAUNCHER_USM(double, rocblas_dsymv) #undef SYMV_LAUNCHER_USM template -inline sycl::event syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, T* a, int64_t lda, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::syr(func, queue, new_uplo, n, alpha, x, incx, a, lda, dependencies); } #define SYR_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); \ } @@ -3100,20 +3096,20 @@ SYR_LAUNCHER_USM(std::complex, rocblas_zsyr) #undef SYR_LAUNCHER_USM template -inline sycl::event syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::syr2(func, queue, new_uplo, n, alpha, x, incx, y, incy, a, lda, dependencies); } #define SYR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -3127,20 +3123,20 @@ SYR2_LAUNCHER_USM(std::complex, rocblas_zsyr2) #undef SYR2_LAUNCHER_USM template -inline sycl::event spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::spmv(func, queue, new_uplo, n, alpha, a, x, incx, beta, y, incy, dependencies); } #define SPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return spmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, \ dependencies); \ } @@ -3151,18 +3147,18 @@ SPMV_LAUNCHER_USM(double, rocblas_dspmv) #undef SPMV_LAUNCHER_USM template -inline sycl::event spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, T *a, - const std::vector &dependencies) { +inline sycl::event spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, T* a, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::spr(func, queue, new_uplo, n, alpha, x, incx, a, dependencies); } #define SPR_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, const std::vector &dependencies) { \ + sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, const std::vector& dependencies) { \ return spr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, dependencies); \ } @@ -3172,19 +3168,19 @@ SPR_LAUNCHER_USM(double, rocblas_dspr) #undef SPR_LAUNCHER_USM template -inline sycl::event spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, - const std::vector &dependencies) { +inline sycl::event spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::spr2(func, queue, new_uplo, n, alpha, x, incx, y, incy, a, dependencies); } #define SPR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return spr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ dependencies); \ } @@ -3195,20 +3191,20 @@ SPR2_LAUNCHER_USM(double, rocblas_dspr2) #undef SPR2_LAUNCHER_USM template -inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3218,7 +3214,7 @@ inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3229,22 +3225,22 @@ inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::tbmv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx, dependencies); } #define TBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, \ dependencies); \ } @@ -3257,20 +3253,20 @@ TBMV_LAUNCHER_USM(std::complex, rocblas_ztbmv) #undef TBMV_LAUNCHER_USM template -inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3280,7 +3276,7 @@ inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3291,22 +3287,22 @@ inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::tbsv(func, queue, new_uplo, new_trans, unit_diag, n, k, a, lda, x, incx, dependencies); } #define TBSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, \ dependencies); \ } @@ -3319,19 +3315,19 @@ TBSV_LAUNCHER_USM(std::complex, rocblas_ztbsv) #undef TBSV_LAUNCHER_USM template -inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3343,7 +3339,7 @@ inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (n > 0) { incx = std::abs(incx); - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3354,22 +3350,22 @@ inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::tpmv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx, dependencies); } #define TPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx, \ dependencies); \ } @@ -3382,19 +3378,19 @@ TPMV_LAUNCHER_USM(std::complex, rocblas_ztpmv) #undef TPMV_LAUNCHER_USM template -inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3406,7 +3402,7 @@ inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (n > 0) { incx = std::abs(incx); - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3417,22 +3413,22 @@ inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::tpsv(func, queue, new_uplo, new_trans, unit_diag, n, a, x, incx, dependencies); } #define TPSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx, \ dependencies); \ } @@ -3445,20 +3441,20 @@ TPSV_LAUNCHER_USM(std::complex, rocblas_ztpsv) #undef TPSV_LAUNCHER_USM template -inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3468,7 +3464,7 @@ inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3479,22 +3475,22 @@ inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::trmv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx, dependencies); } #define TRMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, \ dependencies); \ } @@ -3507,20 +3503,20 @@ TRMV_LAUNCHER_USM(std::complex, rocblas_ztrmv) #undef TRMV_LAUNCHER_USM template -inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3530,7 +3526,7 @@ inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::math::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3541,22 +3537,22 @@ inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::trsv(func, queue, new_uplo, new_trans, unit_diag, n, a, lda, x, incx, dependencies); } #define TRSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, \ dependencies); \ } diff --git a/src/blas/backends/rocblas/rocblas_level3.cpp b/src/blas/backends/rocblas/rocblas_level3.cpp index 904d83114..1ec25bb29 100644 --- a/src/blas/backends/rocblas/rocblas_level3.cpp +++ b/src/blas/backends/rocblas/rocblas_level3.cpp @@ -34,34 +34,34 @@ namespace column_major { // Buffer APIs template -inline void gemm(Func func, sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void gemm(Func func, sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, k, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(transb), m, n, k, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); } #define GEMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE beta, sycl::buffer& c, \ int64_t ldc) { \ gemm(ROCBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ @@ -77,9 +77,9 @@ GEMM_LAUNCHER(std::complex, rocblas_zgemm) template inline void gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, COMPUTETYPE CT, - sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, T_S alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, T_S beta, sycl::buffer &c, + sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, T_S alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, T_S beta, sycl::buffer& c, int64_t ldc) { using rocDataType_A = typename RocEquivalentType::Type; using rocDataType_B = typename RocEquivalentType::Type; @@ -87,30 +87,30 @@ inline void gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C using rocDataType_S = typename RocEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, k, (rocDataType_S *)&alpha, - a_, DT_A, lda, b_, DT_B, ldb, (rocDataType_S *)&beta, c_, DT_C, - ldc, c_, DT_C, ldc, CT, rocblas_gemm_algo_standard, 0, 0); + get_rocblas_operation(transb), m, n, k, (rocDataType_S*)&alpha, a_, + DT_A, lda, b_, DT_B, ldb, (rocDataType_S*)&beta, c_, DT_C, ldc, c_, + DT_C, ldc, CT, rocblas_gemm_algo_standard, 0, 0); }); }); } #define GEMM_EX_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S, ROCBLAS_ROUTINE, ROCMDATATYPE_A, \ ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE_S beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE_S beta, sycl::buffer& c, \ int64_t ldc) { \ gemm_ex(ROCBLAS_ROUTINE, ROCMDATATYPE_A, ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE, \ queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); \ @@ -130,34 +130,34 @@ GEMM_EX_LAUNCHER(bfloat16, bfloat16, bfloat16, float, rocblas_gemm_ex, rocblas_d #undef GEMM_EX_LAUNCHER template -inline void symm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), m, n, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_fill_mode(upper_lower), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); } #define SYMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ symm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, \ c, ldc); \ } @@ -170,34 +170,34 @@ SYMM_LAUNCHER(std::complex, rocblas_zsymm) #undef SYMM_LAUNCHER template -inline void hemm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), m, n, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_fill_mode(upper_lower), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); } #define HEMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ hemm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, \ c, ldc); \ } @@ -208,32 +208,32 @@ HEMM_LAUNCHER(std::complex, rocblas_zhemm) #undef HEMM_LAUNCHER template -inline void syrk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, T beta, - sycl::buffer &c, int64_t ldc) { +inline void syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, T beta, + sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + (rocDataType*)&beta, c_, ldc); }); }); } #define SYRK_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ syrk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); \ } @@ -245,33 +245,33 @@ SYRK_LAUNCHER(std::complex, rocblas_zsyrk) #undef SYRK_LAUNCHER template -inline void herk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, ScalarType alpha, sycl::buffer &a, int64_t lda, - ScalarType beta, sycl::buffer &c, int64_t ldc) { +inline void herk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, ScalarType alpha, sycl::buffer& a, int64_t lda, + ScalarType beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; using rocScalarType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocScalarType *)&alpha, a_, - lda, (rocScalarType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocScalarType*)&alpha, a_, lda, + (rocScalarType*)&beta, c_, ldc); }); }); } #define HERK_LAUNCHER(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - SCALAR_TYPE alpha, sycl::buffer &a, int64_t lda, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + SCALAR_TYPE alpha, sycl::buffer& a, int64_t lda, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ herk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); \ } @@ -281,34 +281,34 @@ HERK_LAUNCHER(std::complex, double, rocblas_zherk) #undef HERK_LAUNCHER template -inline void syr2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); } #define SYR2K_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ syr2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ } @@ -321,37 +321,37 @@ SYR2K_LAUNCHER(std::complex, rocblas_zsyr2k) #undef SYR2K_LAUNCHER template -inline void her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, DataType alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, ScalarType beta, - sycl::buffer &c, int64_t ldc) { +inline void her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, DataType alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, ScalarType beta, + sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; using rocScalarType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, b_, ldb, (rocScalarType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + b_, ldb, (rocScalarType*)&beta, c_, ldc); }); }); } #define HER2K_LAUNCHER(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - DATA_TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + DATA_TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ her2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ } @@ -366,26 +366,26 @@ HER2K_LAUNCHER(std::complex, double, rocblas_zher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline void trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { +inline void trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; #if ROCBLAS_VERSION_MAJOR >= 4 rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, b_, ldb, b_, ldb); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, b_, ldb); #else rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), get_rocblas_fill_mode(upper_lower), @@ -397,9 +397,9 @@ inline void trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lowe } #define TRMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trmm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, \ lda, b, ldb); \ } @@ -412,33 +412,33 @@ TRMM_LAUNCHER(std::complex, rocblas_ztrmm) #undef TRMM_LAUNCHER template -inline void trsm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { +inline void trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, b_, ldb); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb); }); }); } #define TRSM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trsm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, \ lda, b, ldb); \ } @@ -453,25 +453,25 @@ TRSM_LAUNCHER(std::complex, rocblas_ztrsm) // USM APIs template -inline sycl::event gemm(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event gemm(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, k, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(transb), m, n, k, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); @@ -479,10 +479,10 @@ inline sycl::event gemm(Func func, sycl::queue &queue, transpose transa, transpo } #define GEMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, \ - int64_t ldb, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, \ + int64_t ldb, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm(ROCBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, \ c, ldc, dependencies); \ } @@ -497,29 +497,29 @@ GEMM_LAUNCHER_USM(std::complex, rocblas_zgemm) template inline sycl::event gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, - COMPUTETYPE CT, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, T_S alpha, const T_A *a, int64_t lda, - const T_B *b, int64_t ldb, T_S beta, T_C *c, int64_t ldc, - const std::vector &dependencies) { + COMPUTETYPE CT, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, int64_t k, T_S alpha, const T_A* a, int64_t lda, + const T_B* b, int64_t ldb, T_S beta, T_C* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType_A = typename RocEquivalentType::Type; using rocDataType_B = typename RocEquivalentType::Type; using rocDataType_C = typename RocEquivalentType::Type; using rocDataType_S = typename RocEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, k, (rocDataType_S *)&alpha, - a_, DT_A, lda, b_, DT_B, ldb, (rocDataType_S *)&beta, c_, DT_C, - ldc, c_, DT_C, ldc, CT, rocblas_gemm_algo_standard, 0, 0); + get_rocblas_operation(transb), m, n, k, (rocDataType_S*)&alpha, a_, + DT_A, lda, b_, DT_B, ldb, (rocDataType_S*)&beta, c_, DT_C, ldc, c_, + DT_C, ldc, CT, rocblas_gemm_algo_standard, 0, 0); }); }); @@ -528,10 +528,10 @@ inline sycl::event gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE #define GEMM_EX_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S, ROCBLAS_ROUTINE, ROCMDATATYPE_A, \ ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, const TYPE_B *b, \ - int64_t ldb, TYPE_S beta, TYPE_C *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, const TYPE_B* b, \ + int64_t ldb, TYPE_S beta, TYPE_C* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm_ex(ROCBLAS_ROUTINE, ROCMDATATYPE_A, ROCMDATATYPE_B, ROCMDATATYPE_C, \ ROCMCOMPUTETYPE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ @@ -551,24 +551,24 @@ GEMM_EX_LAUNCHER_USM(bfloat16, bfloat16, bfloat16, float, rocblas_gemm_ex, rocbl #undef GEMM_EX_LAUNCHER_USM template -inline sycl::event symm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, const T *a, int64_t lda, const T *b, int64_t ldb, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), m, n, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_fill_mode(upper_lower), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); @@ -576,10 +576,10 @@ inline sycl::event symm(Func func, sycl::queue &queue, side left_right, uplo upp } #define SYMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return symm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -592,24 +592,24 @@ SYMM_LAUNCHER_USM(std::complex, rocblas_zsymm) #undef SYMM_LAUNCHER_USM template -inline sycl::event hemm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, const T *a, int64_t lda, const T *b, int64_t ldb, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), m, n, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_fill_mode(upper_lower), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); @@ -617,10 +617,10 @@ inline sycl::event hemm(Func func, sycl::queue &queue, side left_right, uplo upp } #define HEMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return hemm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -631,23 +631,23 @@ HEMM_LAUNCHER_USM(std::complex, rocblas_zhemm) #undef HEMM_LAUNCHER_USM template -inline sycl::event syrk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, const T *a, int64_t lda, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, const T* a, int64_t lda, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + (rocDataType*)&beta, c_, ldc); }); }); @@ -655,9 +655,9 @@ inline sycl::event syrk(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define SYRK_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syrk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, \ dependencies); \ } @@ -670,25 +670,25 @@ SYRK_LAUNCHER_USM(std::complex, rocblas_zsyrk) #undef SYRK_LAUNCHER_USM template -inline sycl::event herk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, const ScalarType alpha, const DataType *a, int64_t lda, - const ScalarType beta, DataType *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event herk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, const ScalarType alpha, const DataType* a, int64_t lda, + const ScalarType beta, DataType* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; using rocScalarType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocScalarType *)&alpha, a_, - lda, (rocScalarType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocScalarType*)&alpha, a_, lda, + (rocScalarType*)&beta, c_, ldc); }); }); @@ -696,10 +696,10 @@ inline sycl::event herk(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define HERK_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const SCALAR_TYPE alpha, const DATA_TYPE *a, int64_t lda, \ - const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const SCALAR_TYPE alpha, const DATA_TYPE* a, int64_t lda, \ + const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return herk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, \ dependencies); \ } @@ -710,25 +710,25 @@ HERK_LAUNCHER_USM(std::complex, double, rocblas_zherk) #undef HERK_LAUNCHER_USM template -inline sycl::event syr2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *b, - int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* b, + int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); @@ -736,10 +736,10 @@ inline sycl::event syr2k(Func func, sycl::queue &queue, uplo upper_lower, transp } #define SYR2K_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syr2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -752,26 +752,26 @@ SYR2K_LAUNCHER_USM(std::complex, rocblas_zsyr2k) #undef SYR2K_LAUNCHER_USM template -inline sycl::event her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, const DataType alpha, const DataType *a, int64_t lda, - const DataType *b, int64_t ldb, const ScalarType beta, DataType *c, - int64_t ldc, const std::vector &dependencies) { +inline sycl::event her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, const DataType alpha, const DataType* a, int64_t lda, + const DataType* b, int64_t ldb, const ScalarType beta, DataType* c, + int64_t ldc, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; using rocScalarType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, b_, ldb, (rocScalarType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + b_, ldb, (rocScalarType*)&beta, c_, ldc); }); }); @@ -779,10 +779,10 @@ inline sycl::event her2k(Func func, sycl::queue &queue, uplo upper_lower, transp } #define HER2K_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const DATA_TYPE alpha, const DATA_TYPE *a, int64_t lda, const DATA_TYPE *b, \ - int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const DATA_TYPE alpha, const DATA_TYPE* a, int64_t lda, const DATA_TYPE* b, \ + int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return her2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -797,26 +797,26 @@ HER2K_LAUNCHER_USM(std::complex, double, rocblas_zher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline sycl::event trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T *a, - int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a, + int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; #if ROCBLAS_VERSION_MAJOR >= 4 rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, b_, ldb, b_, ldb); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, b_, ldb); #else rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), get_rocblas_fill_mode(upper_lower), @@ -830,9 +830,9 @@ inline sycl::event trmm(Func func, sycl::queue &queue, side left_right, uplo upp } #define TRMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trmm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, dependencies); \ } @@ -845,25 +845,25 @@ TRMM_LAUNCHER_USM(std::complex, rocblas_ztrmm) #undef TRMM_LAUNCHER_USM template -inline sycl::event trsm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T *a, - int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a, + int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemath_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, b_, ldb); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb); }); }); @@ -871,9 +871,9 @@ inline sycl::event trsm(Func func, sycl::queue &queue, side left_right, uplo upp } #define TRSM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trsm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, dependencies); \ } @@ -892,9 +892,9 @@ namespace row_major { // Buffer APIs template -inline void gemm(Func func, sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void gemm(Func func, sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { auto new_transa = transb; auto new_transb = transa; @@ -903,9 +903,9 @@ inline void gemm(Func func, sycl::queue &queue, transpose transa, transpose tran } #define GEMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE beta, sycl::buffer& c, \ int64_t ldc) { \ gemm(ROCBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ @@ -921,9 +921,9 @@ GEMM_LAUNCHER(std::complex, rocblas_zgemm) template inline void gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, COMPUTETYPE CT, - sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, T_S alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, T_S beta, sycl::buffer &c, + sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, T_S alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, T_S beta, sycl::buffer& c, int64_t ldc) { auto new_transa = transb; auto new_transb = transa; @@ -934,9 +934,9 @@ inline void gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C #define GEMM_EX_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S, ROCBLAS_ROUTINE, ROCMDATATYPE_A, \ ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE_S beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE_S beta, sycl::buffer& c, \ int64_t ldc) { \ gemm_ex(ROCBLAS_ROUTINE, ROCMDATATYPE_A, ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE, \ queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); \ @@ -956,21 +956,21 @@ GEMM_EX_LAUNCHER(bfloat16, bfloat16, bfloat16, float, rocblas_gemm_ex, rocblas_d #undef GEMM_EX_LAUNCHER template -inline void symm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; +inline void symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::symm(func, queue, new_side, new_uplo, n, m, alpha, a, lda, b, ldb, beta, c, ldc); } #define SYMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ symm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, \ c, ldc); \ } @@ -983,21 +983,21 @@ SYMM_LAUNCHER(std::complex, rocblas_zsymm) #undef SYMM_LAUNCHER template -inline void hemm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; +inline void hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::hemm(func, queue, new_side, new_uplo, n, m, alpha, a, lda, b, ldb, beta, c, ldc); } #define HEMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ hemm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, \ c, ldc); \ } @@ -1008,21 +1008,21 @@ HEMM_LAUNCHER(std::complex, rocblas_zhemm) #undef HEMM_LAUNCHER template -inline void syrk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, T beta, - sycl::buffer &c, int64_t ldc) { +inline void syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, T beta, + sycl::buffer& c, int64_t ldc) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::syrk(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, beta, c, ldc); } #define SYRK_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ syrk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); \ } @@ -1034,21 +1034,21 @@ SYRK_LAUNCHER(std::complex, rocblas_zsyrk) #undef SYRK_LAUNCHER template -inline void herk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, ScalarType alpha, sycl::buffer &a, int64_t lda, - ScalarType beta, sycl::buffer &c, int64_t ldc) { +inline void herk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, ScalarType alpha, sycl::buffer& a, int64_t lda, + ScalarType beta, sycl::buffer& c, int64_t ldc) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::conjtrans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::herk(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, beta, c, ldc); } #define HERK_LAUNCHER(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - SCALAR_TYPE alpha, sycl::buffer &a, int64_t lda, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + SCALAR_TYPE alpha, sycl::buffer& a, int64_t lda, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ herk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); \ } @@ -1058,22 +1058,22 @@ HERK_LAUNCHER(std::complex, double, rocblas_zherk) #undef HERK_LAUNCHER template -inline void syr2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; column_major::syr2k(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } #define SYR2K_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ syr2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ } @@ -1086,14 +1086,14 @@ SYR2K_LAUNCHER(std::complex, rocblas_zsyr2k) #undef SYR2K_LAUNCHER template -inline void her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, DataType alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, ScalarType beta, - sycl::buffer &c, int64_t ldc) { +inline void her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, DataType alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, ScalarType beta, + sycl::buffer& c, int64_t ldc) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::conjtrans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; auto new_alpha = std::conj(alpha); column_major::her2k(func, queue, new_uplo, new_trans, n, k, new_alpha, a, lda, b, ldb, beta, c, @@ -1101,10 +1101,10 @@ inline void her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose tra } #define HER2K_LAUNCHER(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - DATA_TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + DATA_TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ her2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ } @@ -1119,22 +1119,22 @@ HER2K_LAUNCHER(std::complex, double, rocblas_zher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline void trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; +inline void trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::trmm(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a, lda, b, ldb); } #define TRMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trmm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, \ lda, b, ldb); \ } @@ -1147,22 +1147,22 @@ TRMM_LAUNCHER(std::complex, rocblas_ztrmm) #undef TRMM_LAUNCHER template -inline void trsm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; +inline void trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; column_major::trsm(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a, lda, b, ldb); } #define TRSM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trsm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, \ lda, b, ldb); \ } @@ -1177,10 +1177,10 @@ TRSM_LAUNCHER(std::complex, rocblas_ztrsm) // USM APIs template -inline sycl::event gemm(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event gemm(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { auto new_transa = transb; auto new_transb = transa; @@ -1189,10 +1189,10 @@ inline sycl::event gemm(Func func, sycl::queue &queue, transpose transa, transpo } #define GEMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, \ - int64_t ldb, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, \ + int64_t ldb, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm(ROCBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, \ c, ldc, dependencies); \ } @@ -1207,10 +1207,10 @@ GEMM_LAUNCHER_USM(std::complex, rocblas_zgemm) template inline sycl::event gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, - COMPUTETYPE CT, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, T_S alpha, const T_A *a, int64_t lda, - const T_B *b, int64_t ldb, T_S beta, T_C *c, int64_t ldc, - const std::vector &dependencies) { + COMPUTETYPE CT, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, int64_t k, T_S alpha, const T_A* a, int64_t lda, + const T_B* b, int64_t ldb, T_S beta, T_C* c, int64_t ldc, + const std::vector& dependencies) { auto new_transa = transb; auto new_transb = transa; @@ -1220,10 +1220,10 @@ inline sycl::event gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE #define GEMM_EX_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S, ROCBLAS_ROUTINE, ROCMDATATYPE_A, \ ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, const TYPE_B *b, \ - int64_t ldb, TYPE_S beta, TYPE_C *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, const TYPE_B* b, \ + int64_t ldb, TYPE_S beta, TYPE_C* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm_ex(ROCBLAS_ROUTINE, ROCMDATATYPE_A, ROCMDATATYPE_B, ROCMDATATYPE_C, \ ROCMCOMPUTETYPE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ @@ -1243,23 +1243,23 @@ GEMM_EX_LAUNCHER_USM(bfloat16, bfloat16, bfloat16, float, rocblas_gemm_ex, rocbl #undef GEMM_EX_LAUNCHER_USM template -inline sycl::event symm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, const T *a, int64_t lda, const T *b, int64_t ldb, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; +inline sycl::event symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::symm(func, queue, new_side, new_uplo, n, m, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } #define SYMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return symm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -1272,23 +1272,23 @@ SYMM_LAUNCHER_USM(std::complex, rocblas_zsymm) #undef SYMM_LAUNCHER_USM template -inline sycl::event hemm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, const T *a, int64_t lda, const T *b, int64_t ldb, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; +inline sycl::event hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::hemm(func, queue, new_side, new_uplo, n, m, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } #define HEMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return hemm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -1299,22 +1299,22 @@ HEMM_LAUNCHER_USM(std::complex, rocblas_zhemm) #undef HEMM_LAUNCHER_USM template -inline sycl::event syrk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, const T *a, int64_t lda, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, const T* a, int64_t lda, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::syrk(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } #define SYRK_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syrk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, \ dependencies); \ } @@ -1327,24 +1327,24 @@ SYRK_LAUNCHER_USM(std::complex, rocblas_zsyrk) #undef SYRK_LAUNCHER_USM template -inline sycl::event herk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, const ScalarType alpha, const DataType *a, int64_t lda, - const ScalarType beta, DataType *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event herk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, const ScalarType alpha, const DataType* a, int64_t lda, + const ScalarType beta, DataType* c, int64_t ldc, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::conjtrans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::herk(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } #define HERK_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const SCALAR_TYPE alpha, const DATA_TYPE *a, int64_t lda, \ - const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const SCALAR_TYPE alpha, const DATA_TYPE* a, int64_t lda, \ + const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return herk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, \ dependencies); \ } @@ -1355,24 +1355,24 @@ HERK_LAUNCHER_USM(std::complex, double, rocblas_zherk) #undef HERK_LAUNCHER_USM template -inline sycl::event syr2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *b, - int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* b, + int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::trans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; return column_major::syr2k(func, queue, new_uplo, new_trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } #define SYR2K_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syr2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -1385,14 +1385,14 @@ SYR2K_LAUNCHER_USM(std::complex, rocblas_zsyr2k) #undef SYR2K_LAUNCHER_USM template -inline sycl::event her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, const DataType alpha, const DataType *a, int64_t lda, - const DataType *b, int64_t ldb, const ScalarType beta, DataType *c, - int64_t ldc, const std::vector &dependencies) { +inline sycl::event her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, const DataType alpha, const DataType* a, int64_t lda, + const DataType* b, int64_t ldb, const ScalarType beta, DataType* c, + int64_t ldc, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; auto new_trans = trans == oneapi::math::transpose::nontrans ? oneapi::math::transpose::conjtrans - : oneapi::math::transpose::nontrans; + : oneapi::math::transpose::nontrans; auto new_alpha = std::conj(alpha); return column_major::her2k(func, queue, new_uplo, new_trans, n, k, new_alpha, a, lda, b, ldb, @@ -1400,10 +1400,10 @@ inline sycl::event her2k(Func func, sycl::queue &queue, uplo upper_lower, transp } #define HER2K_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const DATA_TYPE alpha, const DATA_TYPE *a, int64_t lda, const DATA_TYPE *b, \ - int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const DATA_TYPE alpha, const DATA_TYPE* a, int64_t lda, const DATA_TYPE* b, \ + int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return her2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -1418,23 +1418,23 @@ HER2K_LAUNCHER_USM(std::complex, double, rocblas_zher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline sycl::event trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T *a, - int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; +inline sycl::event trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a, + int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::trmm(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a, lda, b, ldb, dependencies); } #define TRMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trmm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, dependencies); \ } @@ -1447,23 +1447,23 @@ TRMM_LAUNCHER_USM(std::complex, rocblas_ztrmm) #undef TRMM_LAUNCHER_USM template -inline sycl::event trsm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T *a, - int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { - auto new_side = - left_right == oneapi::math::side::left ? oneapi::math::side::right : oneapi::math::side::left; +inline sycl::event trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a, + int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { + auto new_side = left_right == oneapi::math::side::left ? oneapi::math::side::right + : oneapi::math::side::left; auto new_uplo = upper_lower == oneapi::math::uplo::lower ? oneapi::math::uplo::upper - : oneapi::math::uplo::lower; + : oneapi::math::uplo::lower; return column_major::trsm(func, queue, new_side, new_uplo, trans, unit_diag, n, m, alpha, a, lda, b, ldb, dependencies); } #define TRSM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trsm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, dependencies); \ } diff --git a/src/blas/backends/rocblas/rocblas_scope_handle.cpp b/src/blas/backends/rocblas/rocblas_scope_handle.cpp index 65187223e..8b59e3f99 100644 --- a/src/blas/backends/rocblas/rocblas_scope_handle.cpp +++ b/src/blas/backends/rocblas/rocblas_scope_handle.cpp @@ -26,7 +26,7 @@ namespace rocblas { template rocblas_handle_container::~rocblas_handle_container() noexcept(false) { - for (auto &handle_pair : rocblas_handle_container_mapper_) { + for (auto& handle_pair : rocblas_handle_container_mapper_) { rocblas_status err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); @@ -59,7 +59,7 @@ thread_local rocblas_handle_container RocblasScopedContextHandler::h #endif RocblasScopedContextHandler::RocblasScopedContextHandler(sycl::queue queue, - sycl::interop_handle &ih) + sycl::interop_handle& ih) : interop_h(ih), needToRecover_(false) { placedContext_ = new sycl::context(queue.get_context()); @@ -89,8 +89,8 @@ RocblasScopedContextHandler::~RocblasScopedContextHandler() noexcept(false) { delete placedContext_; } -void ContextCallback(void *userData) { - auto *ptr = static_cast *>(userData); +void ContextCallback(void* userData) { + auto* ptr = static_cast*>(userData); if (!ptr) { return; } @@ -108,7 +108,7 @@ void ContextCallback(void *userData) { } } -rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue &queue) { +rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue& queue) { auto hipDevice = interop_h.get_native_device(); hipError_t hipErr; hipCtx_t desired; @@ -154,10 +154,10 @@ rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue &queue) return handle; } -hipStream_t RocblasScopedContextHandler::get_stream(const sycl::queue &queue) { +hipStream_t RocblasScopedContextHandler::get_stream(const sycl::queue& queue) { return sycl::get_native(queue); } -sycl::context RocblasScopedContextHandler::get_context(const sycl::queue &queue) { +sycl::context RocblasScopedContextHandler::get_context(const sycl::queue& queue) { return queue.get_context(); } diff --git a/src/blas/backends/rocblas/rocblas_scope_handle.hpp b/src/blas/backends/rocblas/rocblas_scope_handle.hpp index c08db4a78..f5dbd6f23 100644 --- a/src/blas/backends/rocblas/rocblas_scope_handle.hpp +++ b/src/blas/backends/rocblas/rocblas_scope_handle.hpp @@ -45,29 +45,29 @@ namespace rocblas { template struct rocblas_handle_container { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t rocblas_handle_container_mapper_{}; ~rocblas_handle_container() noexcept(false); }; class RocblasScopedContextHandler { HIPcontext original_; - sycl::context *placedContext_; + sycl::context* placedContext_; bool needToRecover_; - sycl::interop_handle &interop_h; + sycl::interop_handle& interop_h; #ifdef ONEMATH_PI_INTERFACE_REMOVED static thread_local rocblas_handle_container handle_helper; #else static thread_local rocblas_handle_container handle_helper; #endif - sycl::context get_context(const sycl::queue &queue); - hipStream_t get_stream(const sycl::queue &queue); + sycl::context get_context(const sycl::queue& queue); + hipStream_t get_stream(const sycl::queue& queue); public: - RocblasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + RocblasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); ~RocblasScopedContextHandler() noexcept(false); - rocblas_handle get_handle(const sycl::queue &queue); + rocblas_handle get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. diff --git a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp index bc23cf26a..5ac12ca0e 100644 --- a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp +++ b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp @@ -27,7 +27,7 @@ namespace blas { namespace rocblas { rocblas_handle_container::~rocblas_handle_container() noexcept(false) { - for (auto &handle_pair : rocblas_handle_mapper_) { + for (auto& handle_pair : rocblas_handle_mapper_) { rocblas_status err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); @@ -46,10 +46,10 @@ thread_local rocblas_handle_container RocblasScopedContextHandler::handle_helper rocblas_handle_container{}; RocblasScopedContextHandler::RocblasScopedContextHandler(sycl::queue queue, - sycl::interop_handle &ih) + sycl::interop_handle& ih) : interop_h(ih) {} -rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue &queue) { +rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue& queue) { sycl::device device = queue.get_device(); int current_device = interop_h.get_native_device(); hipStream_t streamId = get_stream(queue); @@ -84,7 +84,7 @@ rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue &queue) return handle; } -hipStream_t RocblasScopedContextHandler::get_stream(const sycl::queue &queue) { +hipStream_t RocblasScopedContextHandler::get_stream(const sycl::queue& queue) { return interop_h.get_native_queue(); } diff --git a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp index d6fe2a7e7..48dfd433b 100644 --- a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp +++ b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp @@ -35,7 +35,7 @@ namespace blas { namespace rocblas { struct rocblas_handle_container { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t rocblas_handle_mapper_{}; ~rocblas_handle_container() noexcept(false); }; @@ -43,13 +43,13 @@ struct rocblas_handle_container { class RocblasScopedContextHandler { sycl::interop_handle interop_h; static thread_local rocblas_handle_container handle_helper; - sycl::context get_context(const sycl::queue &queue); - hipStream_t get_stream(const sycl::queue &queue); + sycl::context get_context(const sycl::queue& queue); + hipStream_t get_stream(const sycl::queue& queue); public: - RocblasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + RocblasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); - rocblas_handle get_handle(const sycl::queue &queue); + rocblas_handle get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. diff --git a/src/blas/backends/rocblas/rocblas_task.hpp b/src/blas/backends/rocblas/rocblas_task.hpp index bf1197897..ae48720c9 100644 --- a/src/blas/backends/rocblas/rocblas_task.hpp +++ b/src/blas/backends/rocblas/rocblas_task.hpp @@ -53,7 +53,7 @@ namespace rocblas { #ifdef __HIPSYCL__ template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { cgh.hipSYCL_enqueue_custom_operation([f, queue](sycl::interop_handle ih) { auto sc = RocblasScopedContextHandler(queue, ih); f(sc); @@ -61,9 +61,9 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } #else template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih) { #else cgh.host_task([f, queue](sycl::interop_handle ih) { #endif @@ -73,7 +73,7 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } #endif template -static inline void onemath_rocblas_host_task(H &cgh, sycl::queue queue, F f) { +static inline void onemath_rocblas_host_task(H& cgh, sycl::queue queue, F f) { (void)host_task_internal(cgh, queue, f); } diff --git a/src/blas/blas_loader.cpp b/src/blas/blas_loader.cpp index d92e188c6..33c3ca185 100644 --- a/src/blas/blas_loader.cpp +++ b/src/blas/blas_loader.cpp @@ -32,3962 +32,3971 @@ static oneapi::math::detail::table_initializer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_scasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dzasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_sasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dasum_sycl(queue, n, x, incx, result); } -void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_saxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_daxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_caxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zaxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy) { function_tables[{ libkey, queue }].column_major_scopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dcopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_ccopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zcopy_sycl(queue, n, x, incx, y, incy); } -void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_scopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dcopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ccopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zcopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_sdot_sycl(queue, n, x, incx, y, incy, result); } -void dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_ddot_sycl(queue, n, x, incx, y, incy, result); } -void dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dsdot_sycl(queue, n, x, incx, y, incy, result); } -void dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].column_major_cdotc_sycl(queue, n, x, incx, y, incy, result); } -void dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].column_major_zdotc_sycl(queue, n, x, incx, y, incy, result); } -void dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].column_major_cdotu_sycl(queue, n, x, incx, y, incy, result); } -void dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].column_major_zdotu_sycl(queue, n, x, incx, y, incy, result); } -void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_isamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_idamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_icamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_izamin_sycl(queue, n, x, incx, result); } -void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_isamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_idamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_icamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_izamax_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_scnrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dznrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_snrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dnrm2_sycl(queue, n, x, incx, result); } -void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { +void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { function_tables[{ libkey, queue }].column_major_srot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { +void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { function_tables[{ libkey, queue }].column_major_drot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { function_tables[{ libkey, queue }].column_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, double c, double s) { function_tables[{ libkey, queue }].column_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); } -void rotg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { function_tables[{ libkey, queue }].column_major_srotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { function_tables[{ libkey, queue }].column_major_drotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { function_tables[{ libkey, queue }].column_major_crotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { function_tables[{ libkey, queue }].column_major_zrotg_sycl(queue, a, b, c, s); } -void rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param) { function_tables[{ libkey, queue }].column_major_srotm_sycl(queue, n, x, incx, y, incy, param); } -void rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param) { function_tables[{ libkey, queue }].column_major_drotm_sycl(queue, n, x, incx, y, incy, param); } -void rotmg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { function_tables[{ libkey, queue }].column_major_srotmg_sycl(queue, d1, d2, x1, y1, param); } -void rotmg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { function_tables[{ libkey, queue }].column_major_drotmg_sycl(queue, d1, d2, x1, y1, param); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_sscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_cscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_csscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_zscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_zdscal_sycl(queue, n, alpha, x, incx); } -void sdsdot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { +void sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float sb, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, result); } -void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy) { function_tables[{ libkey, queue }].column_major_sswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_cswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zswap_sycl(queue, n, x, incx, y, incy); } -void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, +void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_sgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, +void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_cgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_sdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ddgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, +void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_cdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, +void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].column_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].column_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].column_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].column_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { +void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy) { +void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { function_tables[{ libkey, queue }].column_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { function_tables[{ libkey, queue }].column_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { function_tables[{ libkey, queue }].column_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { function_tables[{ libkey, queue }].column_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { +void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { +void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_gemm_bf16bf16f32_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_chemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zhemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { +void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, - std::int64_t lda, double beta, sycl::buffer, 1> &c, +void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_cher2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zher2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_ssymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_csymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, +void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ssyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, +void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dsyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_csyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zsyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_ssyr2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dsyr2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_csyr2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zsyr2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_strmm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_dtrmm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_ctrmm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_ztrmm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_strsm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_dtrsm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_ctrsm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_ztrsm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_sgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_cgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_hgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_strsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dtrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ctrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ztrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_sgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_cgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].column_major_gemm_s8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].column_major_gemm_u8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].column_major_gemm_u8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { +void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_somatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { +void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_domatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_comatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zomatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { +void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& ab, + std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_simatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { +void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& ab, + std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, +void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_cimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, +void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, float beta, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_somatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_domatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_comatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zomatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].column_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].column_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].column_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].column_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_somatadd_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_domatadd_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_comatadd_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zomatadd_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } // USM APIs -sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dzasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + float* alpha, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_saxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + double* alpha, const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_daxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_caxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zaxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_saxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, const double* x, std::int64_t incx, std::int64_t stridex, + double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_daxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_caxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zaxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_saxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_daxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_caxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zaxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ccopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ccopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sdot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ddot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsdot_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cdotc_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdotc_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cdotu_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdotu_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_isamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_idamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_icamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_izamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_isamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_idamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_icamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_izamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scnrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dznrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_snrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dnrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, const std::vector &dependencies) { +sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, +sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csrot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, float* a, float* b, float* c, + float* s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_srotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_drotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_crotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zrotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_srotm_usm_sycl( queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_drotm_usm_sycl( queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotmg(oneapi::math::device libkey, sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event rotmg(oneapi::math::device libkey, sycl::queue &queue, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, double* d1, double* d2, + double* x1, double y1, double* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event sdsdot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { +sycl::event sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float sb, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sdsdot_usm_sycl( queue, n, sb, x, incx, y, incy, result, dependencies); } -sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, std::int64_t incy, +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, std::int64_t incy, +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ddgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ddgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { +sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sger_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dger_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgerc_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgerc_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgeru_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgeru_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cher_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zher_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cher2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zher2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chpmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhpmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { +sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chpr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { +sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhpr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chpr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhpr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sspmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dspmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { +sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sspr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { +sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dspr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, const std::vector &dependencies) { +sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sspr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, const std::vector &dependencies) { +sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dspr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { +sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_stbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_stbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_stpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_stpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_hgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const bfloat16* a, std::int64_t lda, const bfloat16* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_bf16bf16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, float *c, +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_hgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_hgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, const std::vector &dependencies) { + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, - double *c, std::int64_t ldc, const std::vector &dependencies) { + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_u8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_u8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_simatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float *b, std::int64_t ldb, const std::vector &dependencies) { +sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { +sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, std::int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - std::complex *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stridea, + std::complex* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - std::complex *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stridea, + std::complex* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_simatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, float **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, float** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_simatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, double **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, double** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -4001,3962 +4010,3971 @@ static oneapi::math::detail::table_initializer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_scasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dzasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_sasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dasum_sycl(queue, n, x, incx, result); } -void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_saxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_daxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_caxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zaxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy) { function_tables[{ libkey, queue }].row_major_scopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dcopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_ccopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zcopy_sycl(queue, n, x, incx, y, incy); } -void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_scopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dcopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ccopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zcopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_sdot_sycl(queue, n, x, incx, y, incy, result); } -void dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_ddot_sycl(queue, n, x, incx, y, incy, result); } -void dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dsdot_sycl(queue, n, x, incx, y, incy, result); } -void dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].row_major_cdotc_sycl(queue, n, x, incx, y, incy, result); } -void dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].row_major_zdotc_sycl(queue, n, x, incx, y, incy, result); } -void dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].row_major_cdotu_sycl(queue, n, x, incx, y, incy, result); } -void dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].row_major_zdotu_sycl(queue, n, x, incx, y, incy, result); } -void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_isamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_idamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_icamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_izamin_sycl(queue, n, x, incx, result); } -void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_isamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_idamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_icamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_izamax_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_scnrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dznrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_snrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dnrm2_sycl(queue, n, x, incx, result); } -void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { +void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { function_tables[{ libkey, queue }].row_major_srot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { +void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { function_tables[{ libkey, queue }].row_major_drot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { function_tables[{ libkey, queue }].row_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, double c, double s) { function_tables[{ libkey, queue }].row_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); } -void rotg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { function_tables[{ libkey, queue }].row_major_srotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { function_tables[{ libkey, queue }].row_major_drotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { function_tables[{ libkey, queue }].row_major_crotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { function_tables[{ libkey, queue }].row_major_zrotg_sycl(queue, a, b, c, s); } -void rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param) { function_tables[{ libkey, queue }].row_major_srotm_sycl(queue, n, x, incx, y, incy, param); } -void rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param) { function_tables[{ libkey, queue }].row_major_drotm_sycl(queue, n, x, incx, y, incy, param); } -void rotmg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { function_tables[{ libkey, queue }].row_major_srotmg_sycl(queue, d1, d2, x1, y1, param); } -void rotmg(oneapi::math::device libkey, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(oneapi::math::device libkey, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { function_tables[{ libkey, queue }].row_major_drotmg_sycl(queue, d1, d2, x1, y1, param); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_sscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_cscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_csscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_zscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_zdscal_sycl(queue, n, alpha, x, incx); } -void sdsdot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { +void sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float sb, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, result); } -void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy) { function_tables[{ libkey, queue }].row_major_sswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_cswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zswap_sycl(queue, n, x, incx, y, incy); } -void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, +void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_sgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, +void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_cgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_sdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ddgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, +void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_cdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, +void dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].row_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].row_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].row_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].row_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { +void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy) { +void spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { function_tables[{ libkey, queue }].row_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { function_tables[{ libkey, queue }].row_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { function_tables[{ libkey, queue }].row_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { function_tables[{ libkey, queue }].row_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { +void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { +void symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_gemm_bf16bf16f32_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_chemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zhemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { +void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, - std::int64_t lda, double beta, sycl::buffer, 1> &c, +void herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_cher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_ssymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_csymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, +void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ssyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, +void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dsyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_csyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zsyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_ssyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_csyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_strmm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_dtrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_ctrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_ztrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_strsm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_dtrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_ctrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_ztrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_sgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_cgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_hgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_strsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dtrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ctrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ztrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_sgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_cgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].row_major_gemm_s8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].row_major_gemm_u8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].row_major_gemm_u8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { +void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_somatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { +void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_domatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_comatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zomatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { +void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& ab, + std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_simatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, - std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { +void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& ab, + std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, +void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_cimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, +void imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, float beta, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_somatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_domatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_comatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zomatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].row_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].row_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].row_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].row_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_somatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_domatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_comatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zomatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } // USM APIs -sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dzasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + float* alpha, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_saxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + double* alpha, const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_daxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_caxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zaxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_saxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, const double* x, std::int64_t incx, std::int64_t stridex, + double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_daxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_caxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, +sycl::event axpy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zaxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_saxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_daxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_caxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zaxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ccopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ccopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sdot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ddot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsdot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cdotc_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdotc_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cdotu_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotu(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdotu_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_isamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_idamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_icamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_izamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_isamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_idamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_icamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_izamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scnrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dznrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_snrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dnrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, const std::vector &dependencies) { +sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, +sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csrot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, float* a, float* b, float* c, + float* s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_srotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_drotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_crotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::math::device libkey, sycl::queue &queue, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(oneapi::math::device libkey, sycl::queue& queue, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zrotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_srotm_usm_sycl(queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotm(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_drotm_usm_sycl(queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotmg(oneapi::math::device libkey, sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event rotmg(oneapi::math::device libkey, sycl::queue &queue, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(oneapi::math::device libkey, sycl::queue& queue, double* d1, double* d2, + double* x1, double y1, double* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event sdsdot(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { +sycl::event sdsdot(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float sb, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sdsdot_usm_sycl(queue, n, sb, x, incx, y, incy, result, dependencies); } -sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +sycl::event gbmv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::math::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, std::int64_t incy, +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, std::int64_t incy, +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ddgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ddgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { +sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sger_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event ger(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event ger(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dger_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgerc_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event gerc(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgerc_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgeru_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event geru(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgeru_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cher_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zher_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cher2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event her2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zher2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chpmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhpmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { +sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event hpr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { +sycl::event hpr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chpr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event hpr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhpr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event sbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sspmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dspmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { +sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { +sycl::event spr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, const std::vector &dependencies) { +sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sspr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event spr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, const std::vector &dependencies) { +sycl::event spr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dspr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event symv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event symv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { +sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event syr2(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr2(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_stbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_stbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_stpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_stpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_hgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::math::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const bfloat16* a, std::int64_t lda, const bfloat16* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_bf16bf16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event hemm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event herk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event her2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event her2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event symm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syrk(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, float *c, +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event syr2k(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::math::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::math::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_hgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_hgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, const std::vector &dependencies) { + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, - double *c, std::int64_t ldc, const std::vector &dependencies) { + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::math::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::math::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_u8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_u8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_simatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float *b, std::int64_t ldb, const std::vector &dependencies) { +sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { +sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, std::int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - std::complex *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stridea, + std::complex* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy2(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - std::complex *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stridea, + std::complex* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_simatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::math::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy(oneapi::math::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::math::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd(oneapi::math::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, float **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, float** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_simatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, double **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, double** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::math::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } diff --git a/src/blas/function_table.hpp b/src/blas/function_table.hpp index 71321bdc7..cd07f9dca 100644 --- a/src/blas/function_table.hpp +++ b/src/blas/function_table.hpp @@ -34,4940 +34,4942 @@ typedef struct { // Buffer APIs - void (*column_major_scasum_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_dzasum_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_sasum_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_dasum_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_saxpy_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - void (*column_major_daxpy_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - void (*column_major_caxpy_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zaxpy_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_saxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*column_major_scasum_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_dzasum_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_sasum_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_dasum_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_saxpy_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); + void (*column_major_daxpy_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); + void (*column_major_caxpy_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zaxpy_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_saxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_daxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*column_major_daxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_caxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, + void (*column_major_caxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_zaxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, + void (*column_major_zaxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_saxpby_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_daxpby_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_caxpby_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, + void (*column_major_saxpby_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_daxpby_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_caxpby_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zaxpby_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zaxpby_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_scopy_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_scopy_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); - void (*column_major_dcopy_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + void (*column_major_dcopy_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); - void (*column_major_ccopy_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zcopy_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_scopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*column_major_ccopy_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zcopy_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_scopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_dcopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*column_major_dcopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_ccopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, + void (*column_major_ccopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_zcopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, + void (*column_major_zcopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_sdot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*column_major_ddot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*column_major_dsdot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*column_major_cdotc_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*column_major_zdotc_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*column_major_cdotu_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*column_major_zdotu_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*column_major_isamin_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_idamin_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_icamin_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_izamin_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_isamax_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_idamax_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_icamax_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_izamax_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_scnrm2_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_dznrm2_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_snrm2_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_dnrm2_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_srot_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, + void (*column_major_sdot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*column_major_ddot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*column_major_dsdot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*column_major_cdotc_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*column_major_zdotc_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*column_major_cdotu_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*column_major_zdotu_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*column_major_isamin_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_idamin_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_icamin_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_izamin_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_isamax_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_idamax_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_icamax_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_izamax_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_scnrm2_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_dznrm2_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_snrm2_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_dnrm2_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_srot_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s); - void (*column_major_drot_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, + void (*column_major_drot_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s); - void (*column_major_csrot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, + void (*column_major_csrot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s); - void (*column_major_zdrot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + void (*column_major_zdrot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s); - void (*column_major_srotg_sycl)(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - void (*column_major_drotg_sycl)(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - void (*column_major_crotg_sycl)(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); - void (*column_major_zrotg_sycl)(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); - void (*column_major_srotm_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - void (*column_major_drotm_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m); - void (*column_major_srotmg_sycl)(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - float y1, sycl::buffer ¶m); - void (*column_major_drotmg_sycl)(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - double y1, sycl::buffer ¶m); - void (*column_major_sscal_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dscal_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx); - void (*column_major_cscal_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_csscal_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_zscal_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_zdscal_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_sdsdot_sycl)(sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*column_major_sswap_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + void (*column_major_srotg_sycl)(sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); + void (*column_major_drotg_sycl)(sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); + void (*column_major_crotg_sycl)(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, + sycl::buffer& c, + sycl::buffer, 1>& s); + void (*column_major_zrotg_sycl)(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, + sycl::buffer& c, + sycl::buffer, 1>& s); + void (*column_major_srotm_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); + void (*column_major_drotm_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); + void (*column_major_srotmg_sycl)(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, + float y1, sycl::buffer& param); + void (*column_major_drotmg_sycl)(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, + double y1, sycl::buffer& param); + void (*column_major_sscal_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dscal_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx); + void (*column_major_cscal_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_csscal_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_zscal_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_zdscal_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_sdsdot_sycl)(sycl::queue& queue, std::int64_t n, float sb, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*column_major_sswap_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); - void (*column_major_dswap_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + void (*column_major_dswap_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); - void (*column_major_cswap_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zswap_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_sgbmv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_cswap_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zswap_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_sgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - void (*column_major_dgbmv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + void (*column_major_dgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - void (*column_major_cgbmv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + std::int64_t ku, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + void (*column_major_cgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zgbmv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_sgemv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_sgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_dgemv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_dgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_cgemv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_cgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zgemv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_sgemv_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_sgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - float beta, sycl::buffer &y, + float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_dgemv_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_dgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - double beta, sycl::buffer &y, + double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); void (*column_major_cgemv_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); void (*column_major_zgemv_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_sdgmm_batch_strided_sycl)(sycl::queue &queue, oneapi::math::side left_right, + void (*column_major_sdgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - void (*column_major_ddgmm_batch_strided_sycl)(sycl::queue &queue, oneapi::math::side left_right, + void (*column_major_ddgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); void (*column_major_cdgmm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); void (*column_major_zdgmm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - void (*column_major_sger_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*column_major_dger_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*column_major_cgerc_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + void (*column_major_sger_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*column_major_dger_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*column_major_cgerc_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_zgerc_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_zgerc_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_cgeru_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_cgeru_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_zgeru_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_zgeru_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_chbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_chbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zhbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zhbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_chemv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_chemv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zhemv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zhemv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_cher_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_cher_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_zher_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_zher_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_cher2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_cher2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_zher2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_zher2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_chpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_chpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zhpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zhpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_chpr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_chpr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); - void (*column_major_zhpr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); + void (*column_major_zhpr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); - void (*column_major_chpr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); + void (*column_major_chpr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); - void (*column_major_zhpr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); + void (*column_major_zhpr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); - void (*column_major_ssbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); + void (*column_major_ssbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_dsbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_dsbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_sspmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_dspmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_sspr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); - void (*column_major_dspr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); - void (*column_major_sspr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - void (*column_major_dspr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); - void (*column_major_ssymv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - void (*column_major_dsymv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - void (*column_major_ssyr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); - void (*column_major_dsyr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); - void (*column_major_ssyr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*column_major_dsyr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_sspmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_dspmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_sspr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a); + void (*column_major_dspr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a); + void (*column_major_sspr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a); + void (*column_major_dspr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); + void (*column_major_ssymv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + void (*column_major_dsymv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + void (*column_major_ssyr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a, std::int64_t lda); + void (*column_major_dsyr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a, std::int64_t lda); + void (*column_major_ssyr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*column_major_dsyr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); - void (*column_major_stbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*column_major_stbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*column_major_dtbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*column_major_dtbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); - void (*column_major_ctbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*column_major_ctbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_ztbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_ztbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_stbsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_stbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*column_major_dtbsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*column_major_dtbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); - void (*column_major_ctbsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*column_major_ctbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_ztbsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_ztbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_stpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_stpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dtpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dtpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*column_major_ctpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*column_major_ctpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_ztpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_ztpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_stpsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_stpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dtpsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dtpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*column_major_ctpsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*column_major_ctpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_ztpsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_ztpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_strmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_strmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dtrmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dtrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*column_major_ctrmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*column_major_ctrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*column_major_ztrmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*column_major_ztrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*column_major_strsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*column_major_strsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dtrsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dtrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*column_major_ctrsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*column_major_ctrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*column_major_ztrsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*column_major_ztrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*column_major_sgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*column_major_sgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*column_major_dgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*column_major_dgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc); - void (*column_major_cgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + double beta, sycl::buffer& c, std::int64_t ldc); + void (*column_major_cgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_hgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_hgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::half beta, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::half beta, sycl::buffer& c, std::int64_t ldc); - void (*column_major_gemm_f16f16f32_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*column_major_gemm_f16f16f32_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); - void (*column_major_gemm_bf16bf16f32_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*column_major_gemm_bf16bf16f32_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, + sycl::buffer& a, std::int64_t lda, - sycl::buffer &b, + sycl::buffer& b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_chemm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_chemm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zhemm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zhemm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_cherk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_cherk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zherk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zherk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_cher2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_cher2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - float beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + float beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*column_major_zher2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*column_major_zher2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - double beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + double beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*column_major_ssymm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + void (*column_major_ssymm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_dsymm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_dsymm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_csymm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_csymm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zsymm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zsymm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_ssyrk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_ssyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*column_major_dsyrk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*column_major_dsyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - double beta, sycl::buffer &c, std::int64_t ldc); - void (*column_major_csyrk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + double alpha, sycl::buffer& a, std::int64_t lda, + double beta, sycl::buffer& c, std::int64_t ldc); + void (*column_major_csyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zsyrk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zsyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_ssyrk_batch_strided_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - void (*column_major_dsyrk_batch_strided_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_ssyrk_batch_strided_sycl)( + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); + void (*column_major_dsyrk_batch_strided_sycl)( + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); void (*column_major_csyrk_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_zsyrk_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*column_major_ssyr2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*column_major_ssyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_dsyr2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_dsyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_csyr2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_csyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zsyr2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zsyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_strmm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_strmm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_dtrmm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_dtrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_ctrmm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_ctrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_ztrmm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_ztrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_strsm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_strsm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_dtrsm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_dtrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_ctrsm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_ctrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_ztrsm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_ztrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); void (*column_major_sgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_dgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, double beta, sycl::buffer &c, std::int64_t ldc, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_cgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_zgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*column_major_hgemm_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); + void (*column_major_hgemm_batch_strided_sycl)( + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); void (*column_major_gemm_f16f16f32_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_gemm_s8s8f32_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_gemm_s8s8s32_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_strsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_dtrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_ctrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_ztrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*column_major_sgemmt_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*column_major_sgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_dgemmt_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_dgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_cgemmt_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_cgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zgemmt_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); void (*column_major_gemm_s8u8s32_bias_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*column_major_gemm_s8s8s32_bias_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*column_major_gemm_u8s8s32_bias_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*column_major_gemm_u8u8s32_bias_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*column_major_somatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_domatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_comatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_zomatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*column_major_simatcopy_batch_strided_sycl)(sycl::queue &queue, + void (*column_major_simatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*column_major_dimatcopy_batch_strided_sycl)(sycl::queue &queue, + void (*column_major_dimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*column_major_cimatcopy_batch_strided_sycl)(sycl::queue &queue, + void (*column_major_cimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*column_major_zimatcopy_batch_strided_sycl)(sycl::queue &queue, + void (*column_major_zimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); void (*column_major_somatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_domatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_comatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_zomatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*column_major_somatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_somatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_domatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_domatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_comatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_comatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_zomatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_zomatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_somatcopy2_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_somatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); - void (*column_major_domatcopy2_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_domatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); - void (*column_major_comatcopy2_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_comatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); - void (*column_major_zomatcopy2_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_zomatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); - void (*column_major_simatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_simatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); - void (*column_major_dimatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_dimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); - void (*column_major_cimatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_cimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); - void (*column_major_zimatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*column_major_zimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); - void (*column_major_somatadd_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*column_major_somatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &c, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); - void (*column_major_domatadd_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*column_major_domatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &c, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); - void (*column_major_comatadd_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*column_major_comatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zomatadd_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zomatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // USM APIs - sycl::event (*column_major_scasum_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies); - sycl::event (*column_major_dzasum_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies); - sycl::event (*column_major_sasum_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies); - sycl::event (*column_major_dasum_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies); - sycl::event (*column_major_saxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, + sycl::event (*column_major_scasum_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + float* result, + const std::vector& dependencies); + sycl::event (*column_major_dzasum_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + double* result, + const std::vector& dependencies); + sycl::event (*column_major_sasum_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies); + sycl::event (*column_major_dasum_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies); + sycl::event (*column_major_saxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_daxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, + const std::vector& dependencies); + sycl::event (*column_major_daxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_caxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, + const std::vector& dependencies); + sycl::event (*column_major_caxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zaxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zaxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*column_major_saxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, float* alpha, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_daxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, double *alpha, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, double* alpha, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_caxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_zaxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_saxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, float alpha, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_daxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, double alpha, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_caxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, + sycl::queue& queue, std::int64_t n, std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_zaxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*column_major_saxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, - float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_daxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, - const double beta, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_caxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies); + sycl::event (*column_major_saxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float beta, + float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_daxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, + const double beta, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_caxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, + const std::complex* x, std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zaxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zaxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, + const std::complex* x, std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_scopy_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dcopy_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_ccopy_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zcopy_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_scopy_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dcopy_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_ccopy_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zcopy_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*column_major_scopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_dcopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_ccopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_zcopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_scopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t stridex, - float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_dcopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_ccopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zcopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); - sycl::event (*column_major_sdot_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *result, - const std::vector &dependencies); - sycl::event (*column_major_ddot_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *result, - const std::vector &dependencies); - sycl::event (*column_major_dsdot_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - double *result, - const std::vector &dependencies); - sycl::event (*column_major_cdotc_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*column_major_zdotc_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*column_major_cdotu_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*column_major_zdotu_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*column_major_isamin_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_idamin_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_icamin_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_izamin_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_isamax_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_idamax_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_icamax_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_izamax_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_scnrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies); - sycl::event (*column_major_dznrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies); - sycl::event (*column_major_snrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies); - sycl::event (*column_major_dnrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies); - sycl::event (*column_major_srot_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, float c, + sycl::queue& queue, std::int64_t n, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); + sycl::event (*column_major_sdot_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* result, + const std::vector& dependencies); + sycl::event (*column_major_ddot_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, + double* result, + const std::vector& dependencies); + sycl::event (*column_major_dsdot_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + double* result, + const std::vector& dependencies); + sycl::event (*column_major_cdotc_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*column_major_zdotc_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*column_major_cdotu_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*column_major_zdotu_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*column_major_isamin_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_idamin_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_icamin_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_izamin_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_isamax_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_idamax_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_icamax_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_izamax_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_scnrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + float* result, + const std::vector& dependencies); + sycl::event (*column_major_dznrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + double* result, + const std::vector& dependencies); + sycl::event (*column_major_snrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies); + sycl::event (*column_major_dnrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies); + sycl::event (*column_major_srot_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, float c, float s, - const std::vector &dependencies); - sycl::event (*column_major_drot_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, + const std::vector& dependencies); + sycl::event (*column_major_drot_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, double c, double s, - const std::vector &dependencies); - sycl::event (*column_major_csrot_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_csrot_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, - const std::vector &dependencies); - sycl::event (*column_major_zdrot_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zdrot_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, - const std::vector &dependencies); - sycl::event (*column_major_srotg_usm_sycl)(sycl::queue &queue, float *a, float *b, float *c, - float *s, - const std::vector &dependencies); - sycl::event (*column_major_drotg_usm_sycl)(sycl::queue &queue, double *a, double *b, double *c, - double *s, - const std::vector &dependencies); - sycl::event (*column_major_crotg_usm_sycl)(sycl::queue &queue, std::complex *a, - std::complex *b, float *c, - std::complex *s, - const std::vector &dependencies); - sycl::event (*column_major_zrotg_usm_sycl)(sycl::queue &queue, std::complex *a, - std::complex *b, double *c, - std::complex *s, - const std::vector &dependencies); - sycl::event (*column_major_srotm_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - float *param, - const std::vector &dependencies); - sycl::event (*column_major_drotm_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - double *param, - const std::vector &dependencies); - sycl::event (*column_major_srotmg_usm_sycl)(sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, - const std::vector &dependencies); - sycl::event (*column_major_drotmg_usm_sycl)(sycl::queue &queue, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies); - sycl::event (*column_major_sscal_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dscal_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_cscal_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, + const std::vector& dependencies); + sycl::event (*column_major_srotg_usm_sycl)(sycl::queue& queue, float* a, float* b, float* c, + float* s, + const std::vector& dependencies); + sycl::event (*column_major_drotg_usm_sycl)(sycl::queue& queue, double* a, double* b, double* c, + double* s, + const std::vector& dependencies); + sycl::event (*column_major_crotg_usm_sycl)(sycl::queue& queue, std::complex* a, + std::complex* b, float* c, + std::complex* s, + const std::vector& dependencies); + sycl::event (*column_major_zrotg_usm_sycl)(sycl::queue& queue, std::complex* a, + std::complex* b, double* c, + std::complex* s, + const std::vector& dependencies); + sycl::event (*column_major_srotm_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + float* param, + const std::vector& dependencies); + sycl::event (*column_major_drotm_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + double* param, + const std::vector& dependencies); + sycl::event (*column_major_srotmg_usm_sycl)(sycl::queue& queue, float* d1, float* d2, float* x1, + float y1, float* param, + const std::vector& dependencies); + sycl::event (*column_major_drotmg_usm_sycl)(sycl::queue& queue, double* d1, double* d2, + double* x1, double y1, double* param, + const std::vector& dependencies); + sycl::event (*column_major_sscal_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dscal_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_cscal_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_csscal_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, + const std::vector& dependencies); + sycl::event (*column_major_csscal_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_zscal_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_zdscal_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_sdsdot_usm_sycl)(sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies); - sycl::event (*column_major_sswap_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dswap_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_cswap_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zswap_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_sgbmv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_zscal_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_zdscal_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_sdsdot_usm_sycl)(sycl::queue& queue, std::int64_t n, float sb, + const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* result, + const std::vector& dependencies); + sycl::event (*column_major_sswap_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dswap_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_cswap_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zswap_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_sgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dgbmv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies); + std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*column_major_cgbmv_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); - sycl::event (*column_major_zgbmv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); + sycl::event (*column_major_zgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_sgemv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_sgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, + const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dgemv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_dgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_cgemv_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); sycl::event (*column_major_zgemv_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); sycl::event (*column_major_sgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, std::int64_t incy, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_dgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, std::int64_t incy, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_cgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex beta, std::complex *y, std::int64_t incy, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_sgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, const float **x, std::int64_t *incx, - float *beta, float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, + float* beta, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_dgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, const double **x, std::int64_t *incx, - double *beta, double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, + double* beta, double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_cgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_zgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_sdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_ddgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_cdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_zdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_sdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, std::int64_t *n, - const float **a, std::int64_t *lda, const float **x, std::int64_t *incx, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n, + const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_ddgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, std::int64_t *n, - const double **a, std::int64_t *lda, const double **x, std::int64_t *incx, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n, + const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_cdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_zdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); - sycl::event (*column_major_sger_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, + sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); + sycl::event (*column_major_sger_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_dger_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, + const std::vector& dependencies); + sycl::event (*column_major_dger_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_cgerc_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::vector& dependencies); + sycl::event (*column_major_cgerc_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_zgerc_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_zgerc_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_cgeru_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_cgeru_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_zgeru_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_zgeru_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); sycl::event (*column_major_chbmv_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); sycl::event (*column_major_zhbmv_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); - sycl::event (*column_major_chemv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); + sycl::event (*column_major_chemv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zhemv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_zhemv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_cher_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_cher_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_zher_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_zher_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_cher2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_cher2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_zher2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_zher2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_chpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_chpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zhpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_zhpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_chpr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_chpr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies); - sycl::event (*column_major_zhpr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies); + sycl::event (*column_major_zhpr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies); - sycl::event (*column_major_chpr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies); + sycl::event (*column_major_chpr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies); - sycl::event (*column_major_zhpr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies); + sycl::event (*column_major_zhpr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies); - sycl::event (*column_major_ssbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies); + sycl::event (*column_major_ssbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, + const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dsbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_dsbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_sspmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *a, - const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dspmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *a, - const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_sspr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, - const std::vector &dependencies); - sycl::event (*column_major_dspr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, - const std::vector &dependencies); - sycl::event (*column_major_sspr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *a, - const std::vector &dependencies); - sycl::event (*column_major_dspr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies); - sycl::event (*column_major_ssymv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dsymv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_ssyr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_dsyr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_ssyr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_dsyr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_stbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_sspmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* a, + const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dspmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* a, + const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_sspr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, + const std::vector& dependencies); + sycl::event (*column_major_dspr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, + const std::vector& dependencies); + sycl::event (*column_major_sspr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* a, + const std::vector& dependencies); + sycl::event (*column_major_dspr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, + std::int64_t incy, double* a, + const std::vector& dependencies); + sycl::event (*column_major_ssymv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dsymv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_ssyr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_dsyr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_ssyr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_dsyr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_stbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dtbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ctbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ztbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_stbsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_stbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtbsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dtbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctbsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ctbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztbsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ztbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_stpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_stpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dtpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ctpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ztpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_stpsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_stpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtpsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dtpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctpsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ctpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztpsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ztpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_strmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_strmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, + const float* a, std::int64_t lda, float* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtrmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_dtrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, + const double* a, std::int64_t lda, double* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctrmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ctrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztrmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ztrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_strsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_strsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, + const float* a, std::int64_t lda, float* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtrsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_dtrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, + const double* a, std::int64_t lda, double* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctrsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ctrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztrsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ztrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_sgemm_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_sgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, + const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dgemm_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + const std::vector& dependencies); + sycl::event (*column_major_dgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_cgemm_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_zgemm_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_hgemm_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_hgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies); + const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_gemm_f16f16f32_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies); sycl::event (*column_major_gemm_bf16bf16f32_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const oneapi::math::bfloat16 *a, - std::int64_t lda, const oneapi::math::bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const oneapi::math::bfloat16* a, std::int64_t lda, const oneapi::math::bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_chemm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_zhemm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_zhemm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_cherk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_cherk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha, - const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_zherk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_zherk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, double alpha, - const std::complex *a, std::int64_t lda, - double beta, std::complex *c, + const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_cher2k_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, float beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); sycl::event (*column_major_zher2k_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_ssymm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, double beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_ssymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dsymm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_dsymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_csymm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_zsymm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_zsymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_ssyrk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ssyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, float beta, float *c, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, float beta, float* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dsyrk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_dsyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, + std::int64_t k, double alpha, const double* a, + std::int64_t lda, double beta, double* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_csyrk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_csyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_zsyrk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_zsyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_ssyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *k, float *alpha, const float **a, std::int64_t *lda, - float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, + float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_dsyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *k, double *alpha, const double **a, std::int64_t *lda, - double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, + double* beta, double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_csyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_zsyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_ssyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_dsyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_csyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex *c, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_zsyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex *c, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*column_major_ssyr2k_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ssyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dsyr2k_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_dsyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_csyr2k_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_zsyr2k_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_zsyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_strmm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + const std::vector& dependencies); + sycl::event (*column_major_strmm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_dtrmm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_dtrmm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*column_major_ctrmm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*column_major_ztrmm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); - sycl::event (*column_major_strsm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); + sycl::event (*column_major_strsm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_dtrsm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_dtrsm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*column_major_ctrsm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*column_major_ztrsm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*column_major_strsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_dtrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_ctrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_ztrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_strsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_dtrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_ctrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_ztrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_sgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, const float **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, const float** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_dgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, const double **b, std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, const double** b, std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_cgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_zgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_hgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, sycl::half *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, sycl::half* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, sycl::half* beta, + sycl::half** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_gemm_f16f16f32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8f32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8s32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_sgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_dgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, std::int64_t ldb, + std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_cgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_hgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_gemm_f16f16f32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8f32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8s32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); - sycl::event (*column_major_sgemmt_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); + sycl::event (*column_major_sgemmt_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dgemmt_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_dgemmt_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_cgemmt_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_zgemmt_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_gemm_s8u8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*column_major_gemm_u8s8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*column_major_gemm_u8u8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*column_major_somatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_domatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_comatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zomatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_simatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float alpha, float *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_dimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double alpha, double *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_cimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_somatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float beta, const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_domatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double beta, const double *b, std::int64_t ldb, - std::int64_t stride_b, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, const double* b, std::int64_t ldb, + std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_comatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_zomatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); - sycl::event (*column_major_somatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, - std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_domatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, - std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_comatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_zomatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies); + sycl::event (*column_major_somatcopy_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_domatcopy_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_comatcopy_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_zomatcopy_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*column_major_somatcopy2_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stridea, float *b, - std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stridea, float* b, + std::int64_t ldb, std::int64_t strideb, const std::vector& dependencies); sycl::event (*column_major_domatcopy2_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stridea, double *b, - std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies); - sycl::event (*column_major_comatcopy2_usm_sycl)(sycl::queue &queue, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stridea, double* b, + std::int64_t ldb, std::int64_t strideb, const std::vector& dependencies); + sycl::event (*column_major_comatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*column_major_zomatcopy2_usm_sycl)(sycl::queue &queue, + const std::vector& dependencies); + sycl::event (*column_major_zomatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*column_major_simatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float alpha, - float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_dimatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double alpha, - double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_cimatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::complex alpha, - std::complex *ab, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_simatcopy_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, float* ab, + std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_dimatcopy_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, double* ab, + std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_cimatcopy_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_zimatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::complex alpha, - std::complex *ab, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_zimatcopy_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_somatadd_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + const std::vector& dependencies); + sycl::event (*column_major_somatadd_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, - std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_domatadd_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, + std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_domatadd_usm_sycl)(sycl::queue& queue, + oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, - std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, + std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_comatadd_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_zomatadd_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, - oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_zomatadd_usm_sycl)( + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_somatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*column_major_domatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*column_major_comatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*column_major_zomatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*column_major_simatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, float **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*column_major_dimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, double **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*column_major_cimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*column_major_zimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, std::complex **ab, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); // Buffer APIs - void (*row_major_scasum_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_dzasum_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_sasum_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_dasum_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_saxpy_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - void (*row_major_daxpy_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - void (*row_major_caxpy_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zaxpy_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_saxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*row_major_scasum_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_dzasum_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_sasum_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_dasum_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_saxpy_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); + void (*row_major_daxpy_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); + void (*row_major_caxpy_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zaxpy_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_saxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_daxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*row_major_daxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_caxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, + void (*row_major_caxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_zaxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, + void (*row_major_zaxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_saxpby_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_daxpby_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_caxpby_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + void (*row_major_saxpby_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_daxpby_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_caxpby_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); - void (*row_major_zaxpby_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, + void (*row_major_zaxpby_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_scopy_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - void (*row_major_dcopy_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - void (*row_major_ccopy_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zcopy_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_scopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_scopy_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + void (*row_major_dcopy_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + void (*row_major_ccopy_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zcopy_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_scopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_dcopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*row_major_dcopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_ccopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, + void (*row_major_ccopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_zcopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, + void (*row_major_zcopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_sdot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*row_major_ddot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*row_major_dsdot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*row_major_cdotc_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*row_major_zdotc_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*row_major_cdotu_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*row_major_zdotu_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*row_major_isamin_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_idamin_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_icamin_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_izamin_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_isamax_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_idamax_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_icamax_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_izamax_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_scnrm2_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_dznrm2_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_snrm2_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_dnrm2_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_srot_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, + void (*row_major_sdot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*row_major_ddot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*row_major_dsdot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*row_major_cdotc_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*row_major_zdotc_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*row_major_cdotu_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*row_major_zdotu_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*row_major_isamin_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_idamin_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_icamin_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_izamin_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_isamax_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_idamax_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_icamax_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_izamax_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_scnrm2_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_dznrm2_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_snrm2_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_dnrm2_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_srot_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s); - void (*row_major_drot_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, + void (*row_major_drot_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s); - void (*row_major_csrot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, + void (*row_major_csrot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s); - void (*row_major_zdrot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, + void (*row_major_zdrot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s); - void (*row_major_srotg_sycl)(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - void (*row_major_drotg_sycl)(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - void (*row_major_crotg_sycl)(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); - void (*row_major_zrotg_sycl)(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); - void (*row_major_srotm_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - void (*row_major_drotm_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - void (*row_major_srotmg_sycl)(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m); - void (*row_major_drotmg_sycl)(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - double y1, sycl::buffer ¶m); - void (*row_major_sscal_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dscal_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx); - void (*row_major_cscal_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_csscal_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_zscal_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_zdscal_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_sdsdot_sycl)(sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*row_major_sswap_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - void (*row_major_dswap_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - void (*row_major_cswap_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zswap_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_sgbmv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, + void (*row_major_srotg_sycl)(sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); + void (*row_major_drotg_sycl)(sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); + void (*row_major_crotg_sycl)(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); + void (*row_major_zrotg_sycl)(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, + sycl::buffer& c, + sycl::buffer, 1>& s); + void (*row_major_srotm_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); + void (*row_major_drotm_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); + void (*row_major_srotmg_sycl)(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param); + void (*row_major_drotmg_sycl)(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, + double y1, sycl::buffer& param); + void (*row_major_sscal_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dscal_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx); + void (*row_major_cscal_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_csscal_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_zscal_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_zdscal_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_sdsdot_sycl)(sycl::queue& queue, std::int64_t n, float sb, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*row_major_sswap_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + void (*row_major_dswap_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + void (*row_major_cswap_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zswap_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_sgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_dgbmv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_dgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_cgbmv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_cgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zgbmv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zgbmv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_sgemv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - void (*row_major_dgemv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - void (*row_major_cgemv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_sgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + void (*row_major_dgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + void (*row_major_cgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); - void (*row_major_zgemv_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, + void (*row_major_zgemv_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_sgemv_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_sgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_dgemv_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*row_major_dgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); void (*row_major_cgemv_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); void (*row_major_zgemv_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_sdgmm_batch_strided_sycl)(sycl::queue &queue, oneapi::math::side left_right, + void (*row_major_sdgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - void (*row_major_ddgmm_batch_strided_sycl)(sycl::queue &queue, oneapi::math::side left_right, + void (*row_major_ddgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); void (*row_major_cdgmm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); void (*row_major_zdgmm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - void (*row_major_sger_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*row_major_dger_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*row_major_cgerc_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + void (*row_major_sger_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*row_major_dger_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*row_major_cgerc_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_zgerc_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + void (*row_major_zgerc_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*row_major_cgeru_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*row_major_cgeru_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_zgeru_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + void (*row_major_zgeru_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*row_major_chbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*row_major_chbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); - void (*row_major_zhbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, + void (*row_major_zhbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_chemv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_chemv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zhemv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zhemv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_cher_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_cher_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_zher_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, + void (*row_major_zher_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_cher2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + void (*row_major_cher2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_zher2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, + void (*row_major_zher2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*row_major_chpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*row_major_chpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); - void (*row_major_zhpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, + void (*row_major_zhpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_chpr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); - void (*row_major_zhpr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); - void (*row_major_chpr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a); - void (*row_major_zhpr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_chpr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); + void (*row_major_zhpr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); + void (*row_major_chpr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a); + void (*row_major_zhpr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); - void (*row_major_ssbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - void (*row_major_dsbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - void (*row_major_sspmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); + void (*row_major_ssbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + void (*row_major_dsbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + void (*row_major_sspmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); - void (*row_major_dspmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_sspr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a); - void (*row_major_dspr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a); - void (*row_major_sspr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - void (*row_major_dspr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - void (*row_major_ssymv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_dsymv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_ssyr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda); - void (*row_major_dsyr_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda); - void (*row_major_ssyr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*row_major_dsyr2_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*row_major_stbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_dspmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_sspr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); + void (*row_major_dspr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); + void (*row_major_sspr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a); + void (*row_major_dspr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a); + void (*row_major_ssymv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_dsymv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_ssyr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + void (*row_major_dsyr_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + void (*row_major_ssyr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*row_major_dsyr2_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*row_major_stbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*row_major_dtbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*row_major_dtbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*row_major_ctbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*row_major_ctbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_ztbmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_ztbmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_stbsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_stbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*row_major_dtbsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*row_major_dtbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*row_major_ctbsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*row_major_ctbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_ztbsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_ztbsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_stpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_stpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dtpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dtpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*row_major_ctpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*row_major_ctpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_ztpmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_ztpmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_stpsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_stpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dtpsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dtpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*row_major_ctpsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*row_major_ctpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_ztpsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_ztpsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_strmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_strmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dtrmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dtrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*row_major_ctrmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*row_major_ctrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*row_major_ztrmv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_ztrmv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*row_major_strsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_strsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dtrsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dtrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*row_major_ctrsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*row_major_ctrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*row_major_ztrsv_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_ztrsv_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*row_major_sgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*row_major_sgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_dgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_dgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_cgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + double beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_cgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_zgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*row_major_zgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_hgemm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_hgemm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::half beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_gemm_f16f16f32_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + std::int64_t k, sycl::half alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::half beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_gemm_f16f16f32_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_gemm_bf16bf16f32_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_gemm_bf16bf16f32_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, + sycl::buffer& a, std::int64_t lda, - sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, + sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); - void (*row_major_chemm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + void (*row_major_chemm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zhemm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zhemm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_cherk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_cherk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zherk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zherk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_cher2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_cher2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - float beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + float beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_zher2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_zher2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - double beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + double beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_ssymm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + void (*row_major_ssymm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_dsymm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_dsymm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_csymm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_csymm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zsymm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zsymm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_ssyrk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_ssyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_dsyrk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_dsyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - double beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_csyrk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + double alpha, sycl::buffer& a, std::int64_t lda, + double beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_csyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zsyrk_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zsyrk_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_ssyrk_batch_strided_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_ssyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_dsyrk_batch_strided_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_dsyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_csyrk_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_zsyrk_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_ssyr2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_ssyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_dsyr2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_dsyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_csyr2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_csyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_zsyr2k_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_zsyr2k_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_strmm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_strmm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_dtrmm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_dtrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_ctrmm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_ctrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); - void (*row_major_ztrmm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + void (*row_major_ztrmm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*row_major_strsm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*row_major_strsm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_dtrsm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_dtrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_ctrsm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_ctrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); - void (*row_major_ztrsm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + void (*row_major_ztrsm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*row_major_sgemm_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*row_major_sgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_dgemm_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*row_major_dgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_cgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_zgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_hgemm_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*row_major_hgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_gemm_f16f16f32_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_gemm_s8s8f32_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_gemm_s8s8s32_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_strsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_dtrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_ctrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_ztrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*row_major_sgemmt_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_sgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_dgemmt_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_dgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_cgemmt_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_cgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_zgemmt_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + void (*row_major_zgemmt_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); void (*row_major_gemm_s8u8s32_bias_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*row_major_gemm_s8s8s32_bias_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*row_major_gemm_u8s8s32_bias_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*row_major_gemm_u8u8s32_bias_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); - void (*row_major_somatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); + void (*row_major_somatcopy_batch_strided_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*row_major_domatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, + void (*row_major_domatcopy_batch_strided_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_comatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_zomatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*row_major_simatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, + void (*row_major_simatcopy_batch_strided_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, float alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*row_major_dimatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, + void (*row_major_dimatcopy_batch_strided_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, double alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*row_major_cimatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &ab, + void (*row_major_cimatcopy_batch_strided_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*row_major_zimatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &ab, + void (*row_major_zimatcopy_batch_strided_sycl)(sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); void (*row_major_somatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_domatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_comatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_zomatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_somatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*row_major_somatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_domatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_domatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_comatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_comatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*row_major_zomatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*row_major_zomatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*row_major_somatcopy2_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*row_major_somatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); - void (*row_major_domatcopy2_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*row_major_domatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); - void (*row_major_comatcopy2_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*row_major_comatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); - void (*row_major_zomatcopy2_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*row_major_zomatcopy2_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); - void (*row_major_simatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*row_major_simatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); - void (*row_major_dimatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*row_major_dimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); - void (*row_major_cimatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*row_major_cimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); - void (*row_major_zimatcopy_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + void (*row_major_zimatcopy_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); - void (*row_major_somatadd_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + void (*row_major_somatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_domatadd_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_domatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_comatadd_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + double alpha, sycl::buffer& a, std::int64_t lda, + double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_comatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zomatadd_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zomatadd_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // USM APIs - sycl::event (*row_major_scasum_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies); - sycl::event (*row_major_dzasum_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies); - sycl::event (*row_major_sasum_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies); - sycl::event (*row_major_dasum_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies); - sycl::event (*row_major_saxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, + sycl::event (*row_major_scasum_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + float* result, + const std::vector& dependencies); + sycl::event (*row_major_dzasum_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + double* result, + const std::vector& dependencies); + sycl::event (*row_major_sasum_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies); + sycl::event (*row_major_dasum_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies); + sycl::event (*row_major_saxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_daxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, + const std::vector& dependencies); + sycl::event (*row_major_daxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_caxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, + const std::vector& dependencies); + sycl::event (*row_major_caxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zaxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, + const std::vector& dependencies); + sycl::event (*row_major_zaxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_saxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, float* alpha, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_daxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, double *alpha, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, double* alpha, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_caxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_zaxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_saxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, float alpha, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_daxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, double alpha, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_caxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, + sycl::queue& queue, std::int64_t n, std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_zaxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*row_major_saxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, - float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_daxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, - double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_caxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies); + sycl::event (*row_major_saxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float beta, + float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_daxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double beta, + double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_caxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zaxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, + const std::vector& dependencies); + sycl::event (*row_major_zaxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, + const std::complex* x, std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_scopy_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dcopy_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_ccopy_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zcopy_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_scopy_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_dcopy_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_ccopy_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_zcopy_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_scopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_dcopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_ccopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_zcopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_scopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t stridex, - float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_dcopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_ccopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zcopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); - sycl::event (*row_major_sdot_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *result, - const std::vector &dependencies); - sycl::event (*row_major_ddot_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *result, - const std::vector &dependencies); - sycl::event (*row_major_dsdot_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - double *result, - const std::vector &dependencies); - sycl::event (*row_major_cdotc_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*row_major_zdotc_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*row_major_cdotu_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*row_major_zdotu_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*row_major_isamin_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_idamin_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_icamin_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_izamin_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_isamax_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_idamax_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_icamax_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_izamax_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_scnrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies); - sycl::event (*row_major_dznrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies); - sycl::event (*row_major_snrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies); - sycl::event (*row_major_dnrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies); - sycl::event (*row_major_srot_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, float c, - float s, const std::vector &dependencies); - sycl::event (*row_major_drot_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, - double s, const std::vector &dependencies); - sycl::event (*row_major_csrot_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, - float s, const std::vector &dependencies); - sycl::event (*row_major_zdrot_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, + sycl::queue& queue, std::int64_t n, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); + sycl::event (*row_major_sdot_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* result, + const std::vector& dependencies); + sycl::event (*row_major_ddot_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, + double* result, + const std::vector& dependencies); + sycl::event (*row_major_dsdot_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + double* result, + const std::vector& dependencies); + sycl::event (*row_major_cdotc_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*row_major_zdotc_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*row_major_cdotu_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*row_major_zdotu_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*row_major_isamin_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_idamin_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_icamin_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_izamin_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_isamax_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_idamax_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_icamax_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_izamax_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_scnrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + float* result, + const std::vector& dependencies); + sycl::event (*row_major_dznrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + double* result, + const std::vector& dependencies); + sycl::event (*row_major_snrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies); + sycl::event (*row_major_dnrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies); + sycl::event (*row_major_srot_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, float c, + float s, const std::vector& dependencies); + sycl::event (*row_major_drot_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, double c, + double s, const std::vector& dependencies); + sycl::event (*row_major_csrot_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, + float s, const std::vector& dependencies); + sycl::event (*row_major_zdrot_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, - const std::vector &dependencies); - sycl::event (*row_major_srotg_usm_sycl)(sycl::queue &queue, float *a, float *b, float *c, - float *s, const std::vector &dependencies); - sycl::event (*row_major_drotg_usm_sycl)(sycl::queue &queue, double *a, double *b, double *c, - double *s, - const std::vector &dependencies); - sycl::event (*row_major_crotg_usm_sycl)(sycl::queue &queue, std::complex *a, - std::complex *b, float *c, - std::complex *s, - const std::vector &dependencies); - sycl::event (*row_major_zrotg_usm_sycl)(sycl::queue &queue, std::complex *a, - std::complex *b, double *c, - std::complex *s, - const std::vector &dependencies); - sycl::event (*row_major_srotm_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - float *param, - const std::vector &dependencies); - sycl::event (*row_major_drotm_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - double *param, - const std::vector &dependencies); - sycl::event (*row_major_srotmg_usm_sycl)(sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, - const std::vector &dependencies); - sycl::event (*row_major_drotmg_usm_sycl)(sycl::queue &queue, double *d1, double *d2, double *x1, - double y1, double *param, - const std::vector &dependencies); - sycl::event (*row_major_sscal_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dscal_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_cscal_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, + const std::vector& dependencies); + sycl::event (*row_major_srotg_usm_sycl)(sycl::queue& queue, float* a, float* b, float* c, + float* s, const std::vector& dependencies); + sycl::event (*row_major_drotg_usm_sycl)(sycl::queue& queue, double* a, double* b, double* c, + double* s, + const std::vector& dependencies); + sycl::event (*row_major_crotg_usm_sycl)(sycl::queue& queue, std::complex* a, + std::complex* b, float* c, + std::complex* s, + const std::vector& dependencies); + sycl::event (*row_major_zrotg_usm_sycl)(sycl::queue& queue, std::complex* a, + std::complex* b, double* c, + std::complex* s, + const std::vector& dependencies); + sycl::event (*row_major_srotm_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + float* param, + const std::vector& dependencies); + sycl::event (*row_major_drotm_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + double* param, + const std::vector& dependencies); + sycl::event (*row_major_srotmg_usm_sycl)(sycl::queue& queue, float* d1, float* d2, float* x1, + float y1, float* param, + const std::vector& dependencies); + sycl::event (*row_major_drotmg_usm_sycl)(sycl::queue& queue, double* d1, double* d2, double* x1, + double y1, double* param, + const std::vector& dependencies); + sycl::event (*row_major_sscal_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dscal_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_cscal_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_csscal_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, + const std::vector& dependencies); + sycl::event (*row_major_csscal_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_zscal_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_zdscal_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_sdsdot_usm_sycl)(sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies); - sycl::event (*row_major_sswap_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dswap_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_cswap_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zswap_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_sgbmv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_zscal_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_zdscal_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_sdsdot_usm_sycl)(sycl::queue& queue, std::int64_t n, float sb, + const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* result, + const std::vector& dependencies); + sycl::event (*row_major_sswap_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_dswap_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_cswap_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_zswap_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_sgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dgbmv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_dgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies); + std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_cgbmv_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); - sycl::event (*row_major_zgbmv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); + sycl::event (*row_major_zgbmv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_sgemv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_sgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, + const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dgemv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_dgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_cgemv_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_cgemv_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_zgemv_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); sycl::event (*row_major_sgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, std::int64_t incy, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_dgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, std::int64_t incy, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_cgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex beta, std::complex *y, std::int64_t incy, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_sgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, const float **x, std::int64_t *incx, - float *beta, float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, + float* beta, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_dgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, const double **x, std::int64_t *incx, - double *beta, double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, + double* beta, double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_cgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_zgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_sdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_ddgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_cdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_zdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_sdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, std::int64_t *n, - const float **a, std::int64_t *lda, const float **x, std::int64_t *incx, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n, + const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_ddgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, std::int64_t *n, - const double **a, std::int64_t *lda, const double **x, std::int64_t *incx, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n, + const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_cdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_zdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); - sycl::event (*row_major_sger_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, + sycl::queue& queue, oneapi::math::side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); + sycl::event (*row_major_sger_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_dger_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, + const std::vector& dependencies); + sycl::event (*row_major_dger_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_cgerc_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, + const std::vector& dependencies); + sycl::event (*row_major_cgerc_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_zgerc_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::vector& dependencies); + sycl::event (*row_major_zgerc_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_cgeru_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_cgeru_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_zgeru_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::vector& dependencies); + sycl::event (*row_major_zgeru_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_chbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_chbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_zhbmv_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); - sycl::event (*row_major_chemv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); + sycl::event (*row_major_chemv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zhemv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_zhemv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_cher_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_cher_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_zher_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_zher_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_cher2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_cher2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_zher2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_zher2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_chpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_chpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zhpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_zhpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_chpr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_chpr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies); - sycl::event (*row_major_zhpr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies); + sycl::event (*row_major_zhpr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies); - sycl::event (*row_major_chpr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies); + sycl::event (*row_major_chpr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies); - sycl::event (*row_major_zhpr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies); + sycl::event (*row_major_zhpr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies); - sycl::event (*row_major_ssbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies); + sycl::event (*row_major_ssbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, + const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dsbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dsbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_sspmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *a, - const float *x, std::int64_t incx, float beta, float *y, + const std::vector& dependencies); + sycl::event (*row_major_sspmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* a, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dspmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *a, - const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_sspr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, - const std::vector &dependencies); - sycl::event (*row_major_dspr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, - const std::vector &dependencies); - sycl::event (*row_major_sspr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *a, const std::vector &dependencies); - sycl::event (*row_major_dspr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *a, - const std::vector &dependencies); - sycl::event (*row_major_ssymv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dsymv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_ssyr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_dsyr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_ssyr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_dsyr2_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_stbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dspmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* a, + const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_sspr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, + const std::vector& dependencies); + sycl::event (*row_major_dspr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, + const std::vector& dependencies); + sycl::event (*row_major_sspr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies); + sycl::event (*row_major_dspr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, + double* a, + const std::vector& dependencies); + sycl::event (*row_major_ssymv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_dsymv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_ssyr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_dsyr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_ssyr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_dsyr2_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, + double* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_stbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dtbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ctbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztbmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ztbmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_stbsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_stbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtbsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dtbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctbsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ctbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztbsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ztbsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_stpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_stpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dtpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ctpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztpmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ztpmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_stpsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_stpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtpsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dtpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctpsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ctpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztpsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ztpsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_strmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_strmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, + const float* a, std::int64_t lda, float* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtrmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dtrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, + const double* a, std::int64_t lda, double* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctrmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ctrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztrmv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ztrmv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_strsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_strsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, + const float* a, std::int64_t lda, float* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtrsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dtrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, + const double* a, std::int64_t lda, double* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctrsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ctrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztrsv_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ztrsv_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_sgemm_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_sgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, + const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dgemm_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + const std::vector& dependencies); + sycl::event (*row_major_dgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_cgemm_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + const std::vector& dependencies); + sycl::event (*row_major_cgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_zgemm_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_hgemm_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_hgemm_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, sycl::half beta, - sycl::half *c, std::int64_t ldc, - const std::vector &dependencies); + const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_gemm_f16f16f32_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies); sycl::event (*row_major_gemm_bf16bf16f32_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const oneapi::math::bfloat16 *a, - std::int64_t lda, const oneapi::math::bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const oneapi::math::bfloat16* a, std::int64_t lda, const oneapi::math::bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_chemm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*row_major_zhemm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*row_major_zhemm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_cherk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_cherk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, float alpha, - const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_zherk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_zherk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, double alpha, - const std::complex *a, std::int64_t lda, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_cher2k_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_cher2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_zher2k_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_zher2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_ssymm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_ssymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dsymm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_dsymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_csymm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*row_major_zsymm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*row_major_zsymm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_ssyrk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ssyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, float beta, float *c, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, float beta, float* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dsyrk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dsyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, + std::int64_t k, double alpha, const double* a, + std::int64_t lda, double beta, double* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_csyrk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_csyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_zsyrk_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_zsyrk_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_ssyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *k, float *alpha, const float **a, std::int64_t *lda, - float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, + float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_dsyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *k, double *alpha, const double **a, std::int64_t *lda, - double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, + double* beta, double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_csyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_zsyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *upper_lower, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo* upper_lower, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_ssyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_dsyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_csyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex *c, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_zsyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex *c, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*row_major_ssyr2k_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ssyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dsyr2k_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_dsyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_csyr2k_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*row_major_zsyr2k_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*row_major_zsyr2k_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_strmm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + const std::vector& dependencies); + sycl::event (*row_major_strmm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_dtrmm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_dtrmm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*row_major_ctrmm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*row_major_ztrmm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); - sycl::event (*row_major_strsm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); + sycl::event (*row_major_strsm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_dtrsm_usm_sycl)(sycl::queue &queue, oneapi::math::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_dtrsm_usm_sycl)(sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*row_major_ctrsm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*row_major_ztrsm_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*row_major_strsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_dtrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_ctrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_ztrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::side left_right, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, oneapi::math::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_strsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_dtrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_ctrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_ztrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::side *left_right, oneapi::math::uplo *upper_lower, - oneapi::math::transpose *trans, oneapi::math::diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::side* left_right, oneapi::math::uplo* upper_lower, + oneapi::math::transpose* trans, oneapi::math::diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_sgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, const float **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, const float** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_dgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, const double **b, std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, const double** b, std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_cgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_zgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_hgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, sycl::half *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, sycl::half* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, sycl::half* beta, + sycl::half** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_gemm_f16f16f32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8f32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8s32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *transa, oneapi::math::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* transa, oneapi::math::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_sgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_dgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, std::int64_t ldb, + std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_cgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_hgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_gemm_f16f16f32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8f32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8s32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); - sycl::event (*row_major_sgemmt_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); + sycl::event (*row_major_sgemmt_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dgemmt_usm_sycl)(sycl::queue &queue, oneapi::math::uplo upper_lower, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_dgemmt_usm_sycl)(sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_cgemmt_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_zgemmt_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, + sycl::queue& queue, oneapi::math::uplo upper_lower, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_gemm_s8u8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*row_major_gemm_u8s8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*row_major_gemm_u8u8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, oneapi::math::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*row_major_somatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_domatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_comatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zomatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_simatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - float alpha, float *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_dimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - double alpha, double *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_cimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_somatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float beta, const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_domatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double beta, const double *b, std::int64_t ldb, - std::int64_t stride_b, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, const double* b, std::int64_t ldb, + std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_comatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_zomatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*row_major_somatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); + sycl::event (*row_major_somatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_domatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_domatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_comatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_comatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_zomatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_zomatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_somatcopy2_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_somatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, + const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*row_major_domatcopy2_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_domatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, + const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*row_major_comatcopy2_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_comatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*row_major_zomatcopy2_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_zomatcopy2_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*row_major_simatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_simatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float alpha, - float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_dimatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_dimatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double alpha, - double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_cimatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_cimatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_zimatcopy_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_zimatcopy_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_somatadd_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + const std::vector& dependencies); + sycl::event (*row_major_somatadd_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, - std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_domatadd_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, + std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_domatadd_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, - std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, + std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_comatadd_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose transa, oneapi::math::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*row_major_zomatadd_usm_sycl)(sycl::queue &queue, oneapi::math::transpose transa, + sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*row_major_zomatadd_usm_sycl)(sycl::queue& queue, oneapi::math::transpose transa, oneapi::math::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, + const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_somatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*row_major_domatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*row_major_comatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*row_major_zomatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*row_major_simatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, float **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*row_major_dimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, double **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*row_major_cimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*row_major_zimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, std::complex **ab, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); } blas_function_table_t; diff --git a/src/dft/backends/backend_backward_instantiations.cxx b/src/dft/backends/backend_backward_instantiations.cxx index eab281a96..e475ff55c 100644 --- a/src/dft/backends/backend_backward_instantiations.cxx +++ b/src/dft/backends/backend_backward_instantiations.cxx @@ -25,29 +25,29 @@ using desc_rd_t = dft::detail::descriptor; using desc_cd_t = dft::detail::descriptor; -using depends_vec_t = const std::vector &; +using depends_vec_t = const std::vector&; -#define ONEMATH_DFT_BACKWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T) \ +#define ONEMATH_DFT_BACKWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T) \ /* Buffer API */ \ - template ONEMATH_EXPORT void compute_backward(DESCRIPTOR_T &, \ - sycl::buffer &); \ - template ONEMATH_EXPORT void compute_backward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &); \ - template ONEMATH_EXPORT void compute_backward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &); \ - template ONEMATH_EXPORT void compute_backward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &, \ - sycl::buffer &, sycl::buffer &); \ + template ONEMATH_EXPORT void compute_backward(DESCRIPTOR_T&, \ + sycl::buffer&); \ + template ONEMATH_EXPORT void compute_backward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&); \ + template ONEMATH_EXPORT void compute_backward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&); \ + template ONEMATH_EXPORT void compute_backward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&, sycl::buffer&, \ + sycl::buffer&); \ \ /* USM API */ \ - template ONEMATH_EXPORT sycl::event compute_backward(DESCRIPTOR_T &, FORWARD_T *, \ - depends_vec_t); \ - template ONEMATH_EXPORT sycl::event compute_backward(DESCRIPTOR_T &, SCALAR_T *, \ - SCALAR_T *, depends_vec_t); \ - template ONEMATH_EXPORT sycl::event compute_backward( \ - DESCRIPTOR_T &, BACKWARD_T *, FORWARD_T *, depends_vec_t); \ - template ONEMATH_EXPORT sycl::event compute_backward( \ - DESCRIPTOR_T &, SCALAR_T *, SCALAR_T *, SCALAR_T *, SCALAR_T *, depends_vec_t); + template ONEMATH_EXPORT sycl::event compute_backward(DESCRIPTOR_T&, FORWARD_T*, \ + depends_vec_t); \ + template ONEMATH_EXPORT sycl::event compute_backward(DESCRIPTOR_T&, SCALAR_T*, \ + SCALAR_T*, depends_vec_t); \ + template ONEMATH_EXPORT sycl::event compute_backward(DESCRIPTOR_T&, BACKWARD_T*, \ + FORWARD_T*, depends_vec_t); \ + template ONEMATH_EXPORT sycl::event compute_backward( \ + DESCRIPTOR_T&, SCALAR_T*, SCALAR_T*, SCALAR_T*, SCALAR_T*, depends_vec_t); ONEMATH_DFT_BACKWARD_INSTANTIATIONS(desc_rf_t, float, float, std::complex) ONEMATH_DFT_BACKWARD_INSTANTIATIONS(desc_cf_t, float, std::complex, std::complex) diff --git a/src/dft/backends/backend_compute_signature.cxx b/src/dft/backends/backend_compute_signature.cxx index 0a154eeb6..71fcd793a 100644 --- a/src/dft/backends/backend_compute_signature.cxx +++ b/src/dft/backends/backend_compute_signature.cxx @@ -63,7 +63,8 @@ void forward_op_cc(descriptor_type& desc, sycl::buffer& in, sycl::event forward_op_cc(descriptor_type& desc, fwd_type* in, bwd_type* out, const std::vector& dependencies) override { dft::detail::get_commit(desc)->template compute_call_throw("compute_forward"); - return oneapi::math::dft::BACKEND::compute_forward(desc, in, out, dependencies); + return oneapi::math::dft::BACKEND::compute_forward(desc, in, out, + dependencies); } // forward out-of-place REAL_REAL @@ -79,7 +80,7 @@ sycl::event forward_op_rr(descriptor_type& desc, scalar_type* in_re, scalar_type const std::vector& dependencies) override { dft::detail::get_commit(desc)->template compute_call_throw("compute_forward"); return oneapi::math::dft::BACKEND::compute_forward(desc, in_re, in_im, out_re, out_im, - dependencies); + dependencies); } // backward inplace COMPLEX_COMPLEX @@ -133,5 +134,5 @@ sycl::event backward_op_rr(descriptor_type& desc, scalar_type* in_re, scalar_typ const std::vector& dependencies) override { dft::detail::get_commit(desc)->template compute_call_throw("compute_backward"); return oneapi::math::dft::BACKEND::compute_backward(desc, in_re, in_im, out_re, out_im, - dependencies); + dependencies); } diff --git a/src/dft/backends/backend_forward_instantiations.cxx b/src/dft/backends/backend_forward_instantiations.cxx index fc4084633..17ac748f4 100644 --- a/src/dft/backends/backend_forward_instantiations.cxx +++ b/src/dft/backends/backend_forward_instantiations.cxx @@ -25,29 +25,29 @@ using desc_rd_t = dft::detail::descriptor; using desc_cd_t = dft::detail::descriptor; -using depends_vec_t = const std::vector &; +using depends_vec_t = const std::vector&; -#define ONEMATH_DFT_FORWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T) \ +#define ONEMATH_DFT_FORWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T) \ /* Buffer API */ \ - template ONEMATH_EXPORT void compute_forward(DESCRIPTOR_T &, \ - sycl::buffer &); \ - template ONEMATH_EXPORT void compute_forward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &); \ - template ONEMATH_EXPORT void compute_forward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &); \ - template ONEMATH_EXPORT void compute_forward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &, \ - sycl::buffer &, sycl::buffer &); \ + template ONEMATH_EXPORT void compute_forward(DESCRIPTOR_T&, \ + sycl::buffer&); \ + template ONEMATH_EXPORT void compute_forward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&); \ + template ONEMATH_EXPORT void compute_forward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&); \ + template ONEMATH_EXPORT void compute_forward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&, sycl::buffer&, \ + sycl::buffer&); \ \ /* USM API */ \ - template ONEMATH_EXPORT sycl::event compute_forward(DESCRIPTOR_T &, FORWARD_T *, \ - depends_vec_t); \ - template ONEMATH_EXPORT sycl::event compute_forward(DESCRIPTOR_T &, SCALAR_T *, \ - SCALAR_T *, depends_vec_t); \ - template ONEMATH_EXPORT sycl::event compute_forward(DESCRIPTOR_T &, FORWARD_T *, \ - BACKWARD_T *, depends_vec_t); \ - template ONEMATH_EXPORT sycl::event compute_forward( \ - DESCRIPTOR_T &, SCALAR_T *, SCALAR_T *, SCALAR_T *, SCALAR_T *, depends_vec_t); + template ONEMATH_EXPORT sycl::event compute_forward(DESCRIPTOR_T&, FORWARD_T*, \ + depends_vec_t); \ + template ONEMATH_EXPORT sycl::event compute_forward(DESCRIPTOR_T&, SCALAR_T*, \ + SCALAR_T*, depends_vec_t); \ + template ONEMATH_EXPORT sycl::event compute_forward(DESCRIPTOR_T&, FORWARD_T*, \ + BACKWARD_T*, depends_vec_t); \ + template ONEMATH_EXPORT sycl::event compute_forward( \ + DESCRIPTOR_T&, SCALAR_T*, SCALAR_T*, SCALAR_T*, SCALAR_T*, depends_vec_t); ONEMATH_DFT_FORWARD_INSTANTIATIONS(desc_rf_t, float, float, std::complex) ONEMATH_DFT_FORWARD_INSTANTIATIONS(desc_cf_t, float, std::complex, std::complex) diff --git a/src/dft/backends/cufft/backward.cpp b/src/dft/backends/cufft/backward.cpp index 4dfff0c8b..475f1ea49 100644 --- a/src/dft/backends/cufft/backward.cpp +++ b/src/dft/backends/cufft/backward.cpp @@ -38,19 +38,19 @@ namespace oneapi::math::dft::cufft { namespace detail { //forward declaration template -std::array get_offsets_bwd(dft::detail::commit_impl *commit); +std::array get_offsets_bwd(dft::detail::commit_impl* commit); template -cufftHandle get_bwd_plan(dft::detail::commit_impl *commit) { - return static_cast *>(commit->get_handle())[1].value(); +cufftHandle get_bwd_plan(dft::detail::commit_impl* commit) { + return static_cast*>(commit->get_handle())[1].value(); } } // namespace detail // BUFFER version //In-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout) { const std::string func_name = "compute_backward(desc, inout)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -68,35 +68,35 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, } } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_acc = inout.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, plan); - auto inout_native = reinterpret_cast *>( + auto inout_native = reinterpret_cast*>( ih.get_native_mem(inout_acc)); detail::cufft_execute>( - func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]), - reinterpret_cast(inout_native + offsets[1])); + func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]), + reinterpret_cast(inout_native + offsets[1])); }); }); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type &, sycl::buffer, 1> &, - sycl::buffer, 1> &) { +ONEMATH_EXPORT void compute_backward(descriptor_type&, sycl::buffer, 1>&, + sycl::buffer, 1>&) { throw oneapi::math::unimplemented("DFT", "compute_backward(desc, inout_re, inout_im)", - "cuFFT does not support real-real complex storage."); + "cuFFT does not support real-real complex storage."); } //Out-of-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { const std::string func_name = "compute_backward(desc, in, out)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -113,7 +113,7 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, } } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto in_acc = in.template get_access(cgh); auto out_acc = out.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); @@ -121,12 +121,12 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, plan); - auto in_native = reinterpret_cast( - reinterpret_cast *>( + auto in_native = reinterpret_cast( + reinterpret_cast*>( ih.get_native_mem(in_acc)) + offsets[0]); - auto out_native = reinterpret_cast( - reinterpret_cast *>( + auto out_native = reinterpret_cast( + reinterpret_cast*>( ih.get_native_mem(out_acc)) + offsets[1]); detail::cufft_execute>( @@ -137,20 +137,20 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type &, sycl::buffer, 1> &, - sycl::buffer, 1> &, - sycl::buffer, 1> &, - sycl::buffer, 1> &) { +ONEMATH_EXPORT void compute_backward(descriptor_type&, sycl::buffer, 1>&, + sycl::buffer, 1>&, + sycl::buffer, 1>&, + sycl::buffer, 1>&) { throw oneapi::math::unimplemented("DFT", "compute_backward(desc, in_re, in_im, out_re, out_im)", - "cuFFT does not support real-real complex storage."); + "cuFFT does not support real-real complex storage."); } //USM version //In-place transform template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { const std::string func_name = "compute_backward(desc, inout, dependencies)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -168,7 +168,7 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -185,19 +185,19 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &, scalar *, - scalar *, - const std::vector &) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type&, scalar*, + scalar*, + const std::vector&) { throw oneapi::math::unimplemented("DFT", - "compute_backward(desc, inout_re, inout_im, dependencies)", - "cuFFT does not support real-real complex storage."); + "compute_backward(desc, inout_re, inout_im, dependencies)", + "cuFFT does not support real-real complex storage."); } //Out-of-place transform template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& dependencies) { const std::string func_name = "compute_backward(desc, in, out, dependencies)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -214,7 +214,7 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -231,13 +231,13 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &, scalar *, - scalar *, scalar *, - scalar *, - const std::vector &) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type&, scalar*, + scalar*, scalar*, + scalar*, + const std::vector&) { throw oneapi::math::unimplemented("DFT", - "compute_backward(desc, in_re, in_im, out_re, out_im, deps)", - "cuFFT does not support real-real complex storage."); + "compute_backward(desc, in_re, in_im, out_re, out_im, deps)", + "cuFFT does not support real-real complex storage."); } // Template function instantiations diff --git a/src/dft/backends/cufft/commit.cpp b/src/dft/backends/cufft/commit.cpp index 61be17e1f..b6d2164ff 100644 --- a/src/dft/backends/cufft/commit.cpp +++ b/src/dft/backends/cufft/commit.cpp @@ -57,7 +57,7 @@ class cufft_commit final : public dft::detail::commit_impl { public: cufft_commit(sycl::queue& queue, const dft::detail::dft_values& config_values) : oneapi::math::dft::detail::commit_impl(queue, backend::cufft, - config_values) { + config_values) { if constexpr (prec == dft::detail::precision::DOUBLE) { if (!queue.get_device().has(sycl::aspect::fp64)) { throw math::exception("DFT", "commit", "Device does not support double precision."); @@ -70,14 +70,14 @@ class cufft_commit final : public dft::detail::commit_impl { if (plans[0]) { if (cufftDestroy(plans[0].value()) != CUFFT_SUCCESS) { throw math::exception("dft/backends/cufft", __FUNCTION__, - "Failed to destroy forward cuFFT plan."); + "Failed to destroy forward cuFFT plan."); } plans[0] = std::nullopt; } if (plans[1]) { if (cufftDestroy(plans[1].value()) != CUFFT_SUCCESS) { throw math::exception("dft/backends/cufft", __FUNCTION__, - "Failed to destroy backward cuFFT plan."); + "Failed to destroy backward cuFFT plan."); } plans[1] = std::nullopt; } @@ -88,7 +88,7 @@ class cufft_commit final : public dft::detail::commit_impl { CUcontext interopContext; if (cuDevicePrimaryCtxRetain(&interopContext, interopDevice) != CUDA_SUCCESS) { throw math::exception("dft/backends/cufft", __FUNCTION__, - "Failed to change cuda context."); + "Failed to change cuda context."); } } } @@ -277,7 +277,7 @@ class cufft_commit final : public dft::detail::commit_impl { if (res != CUFFT_SUCCESS) { throw math::exception("dft/backends/cufft", __FUNCTION__, - "Failed to create forward cuFFT plan."); + "Failed to create forward cuFFT plan."); } plans[0] = fwd_plan; @@ -305,7 +305,7 @@ class cufft_commit final : public dft::detail::commit_impl { ); if (res != CUFFT_SUCCESS) { throw math::exception("dft/backends/cufft", __FUNCTION__, - "Failed to create backward cuFFT plan."); + "Failed to create backward cuFFT plan."); } plans[1] = bwd_plan; } @@ -321,7 +321,7 @@ class cufft_commit final : public dft::detail::commit_impl { auto res = cufftSetAutoAllocation(handle, 0); if (res != CUFFT_SUCCESS) { throw math::exception("dft/backends/cufft", "commit", - "cufftSetAutoAllocation(plan, 0) failed."); + "cufftSetAutoAllocation(plan, 0) failed."); } } } diff --git a/src/dft/backends/cufft/execute_helper.hpp b/src/dft/backends/cufft/execute_helper.hpp index 877136bfe..da485fea2 100644 --- a/src/dft/backends/cufft/execute_helper.hpp +++ b/src/dft/backends/cufft/execute_helper.hpp @@ -37,12 +37,12 @@ namespace oneapi::math::dft::cufft::detail { template -inline dft::detail::commit_impl *checked_get_commit( - dft::detail::descriptor &desc) { +inline dft::detail::commit_impl* checked_get_commit( + dft::detail::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::cufft) { throw math::invalid_argument("dft/backends/cufft", "get_commit", - "DFT descriptor has not been commited for cuFFT"); + "DFT descriptor has not been commited for cuFFT"); } return commit_handle; } @@ -50,7 +50,7 @@ inline dft::detail::commit_impl *checked_get_commit( /// Throw an math::invalid_argument if the runtime param in the descriptor does not match /// the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -61,8 +61,8 @@ inline auto expect_config(DescT &desc, const char *message) { enum class Direction { Forward = CUFFT_FORWARD, Backward = CUFFT_INVERSE }; template -void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, void *input, - void *output) { +void cufft_execute(const std::string& func, CUstream stream, cufftHandle plan, void* input, + void* output) { constexpr bool is_real = std::is_floating_point_v; using single_type = std::conditional_t>; constexpr bool is_single = std::is_same_v; @@ -70,37 +70,41 @@ void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, v if constexpr (is_real) { if constexpr (dir == Direction::Forward) { if constexpr (is_single) { - auto result = cufftExecR2C(plan, reinterpret_cast(input), - reinterpret_cast(output)); + auto result = cufftExecR2C(plan, reinterpret_cast(input), + reinterpret_cast(output)); if (result != CUFFT_SUCCESS) { - throw oneapi::math::exception("dft/backends/cufft", func, - "cufftExecR2C returned " + std::to_string(result)); + throw oneapi::math::exception( + "dft/backends/cufft", func, + "cufftExecR2C returned " + std::to_string(result)); } } else { - auto result = cufftExecD2Z(plan, reinterpret_cast(input), - reinterpret_cast(output)); + auto result = cufftExecD2Z(plan, reinterpret_cast(input), + reinterpret_cast(output)); if (result != CUFFT_SUCCESS) { - throw oneapi::math::exception("dft/backends/cufft", func, - "cufftExecD2Z returned " + std::to_string(result)); + throw oneapi::math::exception( + "dft/backends/cufft", func, + "cufftExecD2Z returned " + std::to_string(result)); } } } else { if constexpr (is_single) { - auto result = cufftExecC2R(plan, reinterpret_cast(input), - reinterpret_cast(output)); + auto result = cufftExecC2R(plan, reinterpret_cast(input), + reinterpret_cast(output)); if (result != CUFFT_SUCCESS) { - throw oneapi::math::exception("dft/backends/cufft", func, - "cufftExecC2R returned " + std::to_string(result)); + throw oneapi::math::exception( + "dft/backends/cufft", func, + "cufftExecC2R returned " + std::to_string(result)); } } else { - auto result = cufftExecZ2D(plan, reinterpret_cast(input), - reinterpret_cast(output)); + auto result = cufftExecZ2D(plan, reinterpret_cast(input), + reinterpret_cast(output)); if (result != CUFFT_SUCCESS) { - throw oneapi::math::exception("dft/backends/cufft", func, - "cufftExecZ2D returned " + std::to_string(result)); + throw oneapi::math::exception( + "dft/backends/cufft", func, + "cufftExecZ2D returned " + std::to_string(result)); } } } @@ -108,20 +112,20 @@ void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, v else { if constexpr (is_single) { auto result = - cufftExecC2C(plan, reinterpret_cast(input), - reinterpret_cast(output), static_cast(dir)); + cufftExecC2C(plan, reinterpret_cast(input), + reinterpret_cast(output), static_cast(dir)); if (result != CUFFT_SUCCESS) { throw oneapi::math::exception("dft/backends/cufft", func, - "cufftExecC2C returned " + std::to_string(result)); + "cufftExecC2C returned " + std::to_string(result)); } } else { auto result = - cufftExecZ2Z(plan, reinterpret_cast(input), - reinterpret_cast(output), static_cast(dir)); + cufftExecZ2Z(plan, reinterpret_cast(input), + reinterpret_cast(output), static_cast(dir)); if (result != CUFFT_SUCCESS) { throw oneapi::math::exception("dft/backends/cufft", func, - "cufftExecZ2Z returned " + std::to_string(result)); + "cufftExecZ2Z returned " + std::to_string(result)); } } } @@ -132,17 +136,17 @@ void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, v auto result = cuStreamSynchronize(stream); if (result != CUDA_SUCCESS) { throw oneapi::math::exception("dft/backends/cufft", func, - "cuStreamSynchronize returned " + std::to_string(result)); + "cuStreamSynchronize returned " + std::to_string(result)); } #endif } -inline CUstream setup_stream(const std::string &func, sycl::interop_handle ih, cufftHandle plan) { +inline CUstream setup_stream(const std::string& func, sycl::interop_handle ih, cufftHandle plan) { auto stream = ih.get_native_queue(); auto result = cufftSetStream(plan, stream); if (result != CUFFT_SUCCESS) { throw oneapi::math::exception("dft/backends/cufft", func, - "cufftSetStream returned " + std::to_string(result)); + "cufftSetStream returned " + std::to_string(result)); } return stream; } diff --git a/src/dft/backends/cufft/forward.cpp b/src/dft/backends/cufft/forward.cpp index ca7b2bc62..6b2867b5f 100644 --- a/src/dft/backends/cufft/forward.cpp +++ b/src/dft/backends/cufft/forward.cpp @@ -40,11 +40,11 @@ namespace oneapi::math::dft::cufft { namespace detail { //forward declaration template -std::array get_offsets_fwd(dft::detail::commit_impl *commit); +std::array get_offsets_fwd(dft::detail::commit_impl* commit); template -cufftHandle get_fwd_plan(dft::detail::commit_impl *commit) { - return static_cast *>(commit->get_handle())[0].value(); +cufftHandle get_fwd_plan(dft::detail::commit_impl* commit) { + return static_cast*>(commit->get_handle())[0].value(); } } // namespace detail @@ -52,8 +52,8 @@ cufftHandle get_fwd_plan(dft::detail::commit_impl *commit) { //In-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout) { const std::string func_name = "compute_forward(desc, inout)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -71,34 +71,35 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, offsets[1] *= 2; // offset is supplied in complex but we offset scalar pointer } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_acc = inout.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, plan); - auto inout_native = reinterpret_cast *>( + auto inout_native = reinterpret_cast*>( ih.get_native_mem(inout_acc)); detail::cufft_execute>( - func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]), - reinterpret_cast(inout_native + offsets[1])); + func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]), + reinterpret_cast(inout_native + offsets[1])); }); }); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_forward(descriptor_type &, sycl::buffer, 1> &, - sycl::buffer, 1> &) { +ONEMATH_EXPORT void compute_forward(descriptor_type&, sycl::buffer, 1>&, + sycl::buffer, 1>&) { throw oneapi::math::unimplemented("DFT", "compute_forward(desc, inout_re, inout_im)", - "cuFFT does not support real-real complex storage."); + "cuFFT does not support real-real complex storage."); } //Out-of-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { const std::string func_name = "compute_forward(desc, in, out)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -115,7 +116,7 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer(cgh); auto out_acc = out.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); @@ -123,12 +124,12 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer( - reinterpret_cast *>( + auto in_native = reinterpret_cast( + reinterpret_cast*>( ih.get_native_mem(in_acc)) + offsets[0]); - auto out_native = reinterpret_cast( - reinterpret_cast *>( + auto out_native = reinterpret_cast( + reinterpret_cast*>( ih.get_native_mem(out_acc)) + offsets[1]); detail::cufft_execute>( @@ -139,20 +140,20 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer -ONEMATH_EXPORT void compute_forward(descriptor_type &, sycl::buffer, 1> &, - sycl::buffer, 1> &, - sycl::buffer, 1> &, - sycl::buffer, 1> &) { +ONEMATH_EXPORT void compute_forward(descriptor_type&, sycl::buffer, 1>&, + sycl::buffer, 1>&, + sycl::buffer, 1>&, + sycl::buffer, 1>&) { throw oneapi::math::unimplemented("DFT", "compute_forward(desc, in_re, in_im, out_re, out_im)", - "cuFFT does not support real-real complex storage."); + "cuFFT does not support real-real complex storage."); } //USM version //In-place transform template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { const std::string func_name = "compute_forward(desc, inout, dependencies)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -170,7 +171,7 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -187,19 +188,19 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &, scalar *, - scalar *, - const std::vector &) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type&, scalar*, + scalar*, + const std::vector&) { throw oneapi::math::unimplemented("DFT", - "compute_forward(desc, inout_re, inout_im, dependencies)", - "cuFFT does not support real-real complex storage."); + "compute_forward(desc, inout_re, inout_im, dependencies)", + "cuFFT does not support real-real complex storage."); } //Out-of-place transform template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& dependencies) { const std::string func_name = "compute_forward(desc, in, out, dependencies)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -216,7 +217,7 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -233,10 +234,10 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &, scalar *, - scalar *, scalar *, - scalar *, - const std::vector &) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type&, scalar*, + scalar*, scalar*, + scalar*, + const std::vector&) { throw oneapi::math::unimplemented( "DFT", "compute_forward(desc, in_re, in_im, out_re, out_im, dependencies)", "cuFFT does not support real-real complex storage."); diff --git a/src/dft/backends/descriptor.cpp b/src/dft/backends/descriptor.cpp index a12a0083d..9a3d12911 100644 --- a/src/dft/backends/descriptor.cpp +++ b/src/dft/backends/descriptor.cpp @@ -25,7 +25,7 @@ namespace oneapi::math::dft::detail { template -void descriptor::commit(sycl::queue &queue) { +void descriptor::commit(sycl::queue& queue) { if (!pimpl_ || pimpl_->get_queue() != queue) { if (pimpl_) { pimpl_->get_queue().wait(); @@ -34,9 +34,9 @@ void descriptor::commit(sycl::queue &queue) { } pimpl_->commit(values_); } -template void descriptor::commit(sycl::queue &); -template void descriptor::commit(sycl::queue &); -template void descriptor::commit(sycl::queue &); -template void descriptor::commit(sycl::queue &); +template void descriptor::commit(sycl::queue&); +template void descriptor::commit(sycl::queue&); +template void descriptor::commit(sycl::queue&); +template void descriptor::commit(sycl::queue&); } //namespace oneapi::math::dft::detail diff --git a/src/dft/backends/mklcpu/backward.cpp b/src/dft/backends/mklcpu/backward.cpp index cf62db084..a210c247e 100644 --- a/src/dft/backends/mklcpu/backward.cpp +++ b/src/dft/backends/mklcpu/backward.cpp @@ -40,26 +40,26 @@ namespace detail { // BUFFER version // backward a MKLCPU DFT call to the backend, checking that the commit impl is valid. template -inline void check_bwd_commit(dft::descriptor &desc) { +inline void check_bwd_commit(dft::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklcpu) { throw math::invalid_argument("DFT", "computer_backward", - "DFT descriptor has not been commited for MKLCPU"); + "DFT descriptor has not been commited for MKLCPU"); } - auto mklcpu_desc = reinterpret_cast(commit_handle->get_handle()); + auto mklcpu_desc = reinterpret_cast(commit_handle->get_handle()); MKL_LONG commit_status{ DFTI_UNCOMMITTED }; DftiGetValue(mklcpu_desc[1], DFTI_COMMIT_STATUS, &commit_status); if (commit_status != DFTI_COMMITTED) { throw math::invalid_argument("DFT", "compute_backward", - "MKLCPU DFT descriptor was not successfully committed."); + "MKLCPU DFT descriptor was not successfully committed."); } } // Throw an math::invalid_argument if the runtime param in the descriptor does not match // the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::detail::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -68,26 +68,26 @@ inline auto expect_config(DescT &desc, const char *message) { } // convert the base commit class to derived cpu commit class template -auto get_buffer(commit_t *commit_handle) { - commit_derived_t *derived_commit = - static_cast *>(commit_handle); +auto get_buffer(commit_t* commit_handle) { + commit_derived_t* derived_commit = + static_cast*>(commit_handle); return derived_commit->get_handle_buffer(); } } // namespace detail //In-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout) { detail::expect_config( desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto inout_acc = inout.template get_access(cgh); detail::host_task(cgh, [=]() { @@ -104,20 +104,20 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto re_acc = inout_re.template get_access(cgh); auto im_acc = inout_im.template get_access(cgh); @@ -136,26 +136,26 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, //Out-of-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config(desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto in_acc = in.template get_access(cgh); auto out_acc = out.template get_access(cgh); detail::host_task(cgh, [=]() { - auto in_ptr = const_cast *>(detail::acc_to_ptr(in_acc)); + auto in_ptr = const_cast*>(detail::acc_to_ptr(in_acc)); DFT_ERROR status = DftiComputeBackward(desc_acc[detail::DIR::bwd], in_ptr, detail::acc_to_ptr(out_acc)); if (status != DFTI_NO_ERROR) { @@ -169,22 +169,22 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto inre_acc = in_re.template get_access(cgh); auto inim_acc = in_im.template get_access(cgh); @@ -192,8 +192,8 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, auto outim_acc = out_im.template get_access(cgh); detail::host_task(cgh, [=]() { - auto inre_ptr = const_cast *>(detail::acc_to_ptr(inre_acc)); - auto inim_ptr = const_cast *>(detail::acc_to_ptr(inim_acc)); + auto inre_ptr = const_cast*>(detail::acc_to_ptr(inre_acc)); + auto inim_ptr = const_cast*>(detail::acc_to_ptr(inim_acc)); DFT_ERROR status = DftiComputeBackward(desc_acc[detail::DIR::bwd], inre_ptr, inim_ptr, detail::acc_to_ptr(outre_acc), detail::acc_to_ptr(outim_acc)); @@ -210,18 +210,18 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, //In-place transform template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); detail::host_task(cgh, [=]() { @@ -237,19 +237,20 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, + scalar* inout_re, + scalar* inout_im, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); detail::host_task(cgh, [=]() { @@ -265,9 +266,9 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& dependencies) { // Check: inplace, complex storage detail::expect_config(desc, @@ -275,10 +276,10 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwdget_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); @@ -295,20 +296,20 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); diff --git a/src/dft/backends/mklcpu/commit.cpp b/src/dft/backends/mklcpu/commit.cpp index ca5ecaa87..a8dca834b 100644 --- a/src/dft/backends/mklcpu/commit.cpp +++ b/src/dft/backends/mklcpu/commit.cpp @@ -125,7 +125,7 @@ void commit_derived_impl::set_value_item(mklcpu_desc_t hand, enum DFT DFT_ERROR value_err = DftiSetValue(hand, name, args...); if (value_err != DFTI_NO_ERROR) { throw oneapi::math::exception("dft/backends/mklcpu", "set_value_item", - DftiErrorMessage(value_err)); + DftiErrorMessage(value_err)); } } @@ -169,17 +169,17 @@ void commit_derived_impl::set_value(mklcpu_desc_t* descHandle, // Setting the workspace causes an FFT_INVALID_DESCRIPTOR. if (config.workspace != config_value::ALLOW) { throw math::invalid_argument("dft/backends/mklcpu", "commit", - "MKLCPU only supports workspace set to allow"); + "MKLCPU only supports workspace set to allow"); } // Setting the ordering causes an FFT_INVALID_DESCRIPTOR. Check that default is used: if (config.ordering != dft::detail::config_value::ORDERED) { throw math::invalid_argument("dft/backends/mklcpu", "commit", - "MKLCPU only supports ordered ordering."); + "MKLCPU only supports ordered ordering."); } // Setting the transpose causes an FFT_INVALID_DESCRIPTOR. Check that default is used: if (config.transpose != false) { throw math::invalid_argument("dft/backends/mklcpu", "commit", - "MKLCPU only supports non-transposed."); + "MKLCPU only supports non-transposed."); } } } diff --git a/src/dft/backends/mklcpu/forward.cpp b/src/dft/backends/mklcpu/forward.cpp index 8c8b17f85..304bb6d01 100644 --- a/src/dft/backends/mklcpu/forward.cpp +++ b/src/dft/backends/mklcpu/forward.cpp @@ -40,26 +40,26 @@ namespace detail { // BUFFER version // Forward a MKLCPU DFT call to the backend, checking that the commit impl is valid. template -inline void check_fwd_commit(dft::descriptor &desc) { +inline void check_fwd_commit(dft::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklcpu) { throw math::invalid_argument("DFT", "computer_forward", - "DFT descriptor has not been commited for MKLCPU"); + "DFT descriptor has not been commited for MKLCPU"); } - auto mklcpu_desc = reinterpret_cast(commit_handle->get_handle()); + auto mklcpu_desc = reinterpret_cast(commit_handle->get_handle()); MKL_LONG commit_status{ DFTI_UNCOMMITTED }; DftiGetValue(mklcpu_desc[0], DFTI_COMMIT_STATUS, &commit_status); if (commit_status != DFTI_COMMITTED) { throw math::invalid_argument("DFT", "compute_forward", - "MKLCPU DFT descriptor was not successfully committed."); + "MKLCPU DFT descriptor was not successfully committed."); } } // Throw an math::invalid_argument if the runtime param in the descriptor does not match // the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::detail::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -69,26 +69,26 @@ inline auto expect_config(DescT &desc, const char *message) { // convert the base commit class to derived cpu commit class template -auto get_buffer(commit_t *commit_handle) { - commit_derived_t *derived_commit = - static_cast *>(commit_handle); +auto get_buffer(commit_t* commit_handle) { + commit_derived_t* derived_commit = + static_cast*>(commit_handle); return derived_commit->get_handle_buffer(); } } // namespace detail //In-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout) { detail::expect_config( desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto inout_acc = inout.template get_access(cgh); detail::host_task(cgh, [=]() { @@ -105,20 +105,20 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto re_acc = inout_re.template get_access(cgh); auto im_acc = inout_im.template get_access(cgh); @@ -137,25 +137,26 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, //Out-of-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config(desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto in_acc = in.template get_access(cgh); auto out_acc = out.template get_access(cgh); detail::host_task(cgh, [=]() { - auto in_ptr = const_cast *>(detail::acc_to_ptr(in_acc)); + auto in_ptr = const_cast*>(detail::acc_to_ptr(in_acc)); DFT_ERROR status = DftiComputeForward(desc_acc[detail::DIR::fwd], in_ptr, detail::acc_to_ptr(out_acc)); if (status != DFTI_NO_ERROR) { @@ -169,22 +170,22 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto inre_acc = in_re.template get_access(cgh); auto inim_acc = in_im.template get_access(cgh); @@ -192,8 +193,8 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, auto outim_acc = out_im.template get_access(cgh); detail::host_task(cgh, [=]() { - auto inre_ptr = const_cast *>(detail::acc_to_ptr(inre_acc)); - auto inim_ptr = const_cast *>(detail::acc_to_ptr(inim_acc)); + auto inre_ptr = const_cast*>(detail::acc_to_ptr(inre_acc)); + auto inim_ptr = const_cast*>(detail::acc_to_ptr(inim_acc)); DFT_ERROR status = DftiComputeForward(desc_acc[detail::DIR::fwd], inre_ptr, inim_ptr, detail::acc_to_ptr(outre_acc), detail::acc_to_ptr(outim_acc)); @@ -210,18 +211,18 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, //In-place transform template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); @@ -238,20 +239,20 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* inout_re, + scalar* inout_im, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); @@ -268,9 +269,9 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& dependencies) { // Check: inplace detail::expect_config(desc, @@ -278,11 +279,11 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwdget_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); @@ -299,22 +300,22 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); diff --git a/src/dft/backends/mklcpu/mklcpu_helpers.hpp b/src/dft/backends/mklcpu/mklcpu_helpers.hpp index e8f9ff951..c3dc4db4b 100644 --- a/src/dft/backends/mklcpu/mklcpu_helpers.hpp +++ b/src/dft/backends/mklcpu/mklcpu_helpers.hpp @@ -83,7 +83,7 @@ inline constexpr DFTI_CONFIG_PARAM to_mklcpu(dft::detail::config_param param) { case iparam::COMMIT_STATUS: return DFTI_COMMIT_STATUS; default: throw math::invalid_argument("dft", "MKLCPU descriptor set_value()", - "Invalid config param."); + "Invalid config param."); return static_cast(0); } } @@ -106,7 +106,7 @@ inline constexpr int to_mklcpu( } else { throw math::invalid_argument("dft", "MKLCPU descriptor set_value()", - "Invalid config value for complex storage."); + "Invalid config value for complex storage."); return 0; } } @@ -119,7 +119,7 @@ inline constexpr int to_mklcpu( } else { throw math::invalid_argument("dft", "MKLCPU descriptor set_value()", - "Invalid config value for real storage."); + "Invalid config value for real storage."); return 0; } } @@ -131,7 +131,7 @@ inline constexpr int to_mklcpu( } else { throw math::invalid_argument("dft", "MKLCPU descriptor set_value()", - "Invalid config value for inplace."); + "Invalid config value for inplace."); return 0; } } @@ -160,7 +160,7 @@ inline constexpr int to_mklcpu( } else { throw math::invalid_argument("dft", "MKLCPU descriptor set_value()", - "Invalid config value for packed format."); + "Invalid config value for packed format."); return 0; } } diff --git a/src/dft/backends/mklgpu/backward.cpp b/src/dft/backends/mklgpu/backward.cpp index f9d315d2f..c9a976ff0 100644 --- a/src/dft/backends/mklgpu/backward.cpp +++ b/src/dft/backends/mklgpu/backward.cpp @@ -45,33 +45,34 @@ namespace detail { /// Forward a MKLGPU DFT call to the backend, checking that the commit impl is valid. /// Assumes backend descriptor values match those of the frontend. template -inline auto compute_backward(dft::detail::descriptor &desc, ArgTs &&... args) { +inline auto compute_backward(dft::detail::descriptor& desc, ArgTs&&... args) { using mklgpu_desc_t = oneapi::mkl::dft::descriptor; using desc_shptr_t = std::shared_ptr; using handle_t = std::pair; auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklgpu) { throw math::invalid_argument("DFT", "compute_backward", - "DFT descriptor has not been commited for MKLGPU"); + "DFT descriptor has not been commited for MKLGPU"); } - auto handle = reinterpret_cast(commit_handle->get_handle()); + auto handle = reinterpret_cast(commit_handle->get_handle()); auto mklgpu_desc = handle->second; // Second because backward DFT. int commit_status{ DFTI_UNCOMMITTED }; mklgpu_desc->get_value(oneapi::mkl::dft::config_param::COMMIT_STATUS, &commit_status); if (commit_status != DFTI_COMMITTED) { throw math::invalid_argument("DFT", "compute_backward", - "MKLGPU DFT descriptor was not successfully committed."); + "MKLGPU DFT descriptor was not successfully committed."); } // The MKLGPU backend's interface contains fewer function signatures than in this // open-source library. Consequently, it is not required to forward template arguments // to resolve to the correct function. - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::dft::compute_backward(*mklgpu_desc, std::forward(args)...)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + oneapi::mkl::dft::compute_backward(*mklgpu_desc, std::forward(args)...)); } /// Throw an math::invalid_argument if the runtime param in the descriptor does not match /// the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::detail::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -84,8 +85,8 @@ inline auto expect_config(DescT &desc, const char *message) { //In-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout) { detail::expect_config( desc, "Unexpected value for placement"); return detail::compute_backward(desc, inout); @@ -93,18 +94,19 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type & /*desc*/, - sycl::buffer, 1> & /*inout_re*/, - sycl::buffer, 1> & /*inout_im*/) { - throw math::unimplemented("DFT", "compute_backward", - "MKLGPU does not support compute_backward(desc, inout_re, inout_im)."); +ONEMATH_EXPORT void compute_backward(descriptor_type& /*desc*/, + sycl::buffer, 1>& /*inout_re*/, + sycl::buffer, 1>& /*inout_im*/) { + throw math::unimplemented( + "DFT", "compute_backward", + "MKLGPU does not support compute_backward(desc, inout_re, inout_im)."); } //Out-of-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config(desc, "Unexpected value for placement"); @@ -113,11 +115,11 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> & /*in_re*/, - sycl::buffer, 1> & /*in_im*/, - sycl::buffer, 1> & /*out_re*/, - sycl::buffer, 1> & /*out_im*/) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& /*in_re*/, + sycl::buffer, 1>& /*in_im*/, + sycl::buffer, 1>& /*out_re*/, + sycl::buffer, 1>& /*out_im*/) { detail::expect_config( desc, "Unexpected value for complex storage"); @@ -130,8 +132,8 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, //In-place transform template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for placement"); return detail::compute_backward(desc, inout, dependencies); @@ -139,10 +141,10 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type & /*desc*/, - scalar * /*inout_re*/, - scalar * /*inout_im*/, - const std::vector & /*dependencies*/) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& /*desc*/, + scalar* /*inout_re*/, + scalar* /*inout_im*/, + const std::vector& /*dependencies*/) { throw math::unimplemented( "DFT", "compute_backward", "MKLGPU does not support compute_backward(desc, inout_re, inout_im, dependencies)."); @@ -150,9 +152,9 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type & /*desc*/, //Out-of-place transform template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& dependencies) { detail::expect_config(desc, "Unexpected value for placement"); @@ -161,12 +163,12 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, - scalar * /*in_re*/, - scalar * /*in_im*/, - scalar * /*out_re*/, - scalar * /*out_im*/, - const std::vector & /*dependencies*/) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, + scalar* /*in_re*/, + scalar* /*in_im*/, + scalar* /*out_re*/, + scalar* /*out_im*/, + const std::vector& /*dependencies*/) { detail::expect_config( desc, "Unexpected value for complex storage"); diff --git a/src/dft/backends/mklgpu/commit.cpp b/src/dft/backends/mklgpu/commit.cpp index 9742baded..e2550f828 100644 --- a/src/dft/backends/mklgpu/commit.cpp +++ b/src/dft/backends/mklgpu/commit.cpp @@ -79,14 +79,14 @@ class mklgpu_commit final : public dft::detail::commit_impl { public: mklgpu_commit(sycl::queue queue, const dft::detail::dft_values& config_values) : oneapi::math::dft::detail::commit_impl(queue, backend::mklgpu, - config_values), + config_values), handle(std::make_shared(config_values.dimensions), nullptr) { handle.second = handle.first; // Make sure the bwd pointer is valid. // MKLGPU does not throw an informative exception for the following: if constexpr (prec == dft::detail::precision::DOUBLE) { if (!queue.get_device().has(sycl::aspect::fp64)) { throw math::exception("dft/backends/mklgpu", "commit", - "Device does not support double precision."); + "Device does not support double precision."); } } } @@ -163,7 +163,7 @@ class mklgpu_commit final : public dft::detail::commit_impl { to_mklgpu(config.complex_storage)); if (config.real_storage != dft::detail::config_value::REAL_REAL) { throw math::invalid_argument("dft/backends/mklgpu", "commit", - "MKLGPU only supports real-real real storage."); + "MKLGPU only supports real-real real storage."); } desc.set_value(backend_param::CONJUGATE_EVEN_STORAGE, to_mklgpu(config.conj_even_storage)); @@ -173,7 +173,7 @@ class mklgpu_commit final : public dft::detail::commit_impl { if (stride_choice == dft::detail::stride_api::FB_STRIDES) { if (config.fwd_strides[0] != 0 || config.fwd_strides[0] != 0) { throw math::unimplemented("dft/backends/mklgpu", "commit", - "MKLGPU does not support nonzero offsets."); + "MKLGPU does not support nonzero offsets."); } desc.set_value(backend_param::FWD_STRIDES, config.fwd_strides.data()); desc.set_value(backend_param::BWD_STRIDES, config.bwd_strides.data()); @@ -181,7 +181,7 @@ class mklgpu_commit final : public dft::detail::commit_impl { else { if (config.input_strides[0] != 0 || config.output_strides[0] != 0) { throw math::unimplemented("dft/backends/mklgpu", "commit", - "MKLGPU does not support nonzero offsets."); + "MKLGPU does not support nonzero offsets."); } if (assume_fwd_dft) { desc.set_value(backend_param::FWD_STRIDES, config.input_strides.data()); @@ -203,12 +203,12 @@ class mklgpu_commit final : public dft::detail::commit_impl { // Setting the ordering causes an FFT_INVALID_DESCRIPTOR. Check that default is used: if (config.ordering != dft::detail::config_value::ORDERED) { throw math::invalid_argument("dft/backends/mklgpu", "commit", - "MKLGPU only supports ordered ordering."); + "MKLGPU only supports ordered ordering."); } // Setting the transpose causes an FFT_INVALID_DESCRIPTOR. Check that default is used: if (config.transpose != false) { throw math::invalid_argument("dft/backends/mklgpu", "commit", - "MKLGPU only supports non-transposed."); + "MKLGPU only supports non-transposed."); } } diff --git a/src/dft/backends/mklgpu/forward.cpp b/src/dft/backends/mklgpu/forward.cpp index 3ff8e9c9b..4f6522614 100644 --- a/src/dft/backends/mklgpu/forward.cpp +++ b/src/dft/backends/mklgpu/forward.cpp @@ -51,33 +51,34 @@ namespace detail { /// Forward a MKLGPU DFT call to the backend, checking that the commit impl is valid. /// Assumes backend descriptor values match those of the frontend. template -inline auto compute_forward(dft::detail::descriptor &desc, ArgTs &&... args) { +inline auto compute_forward(dft::detail::descriptor& desc, ArgTs&&... args) { using mklgpu_desc_t = oneapi::mkl::dft::descriptor; using desc_shptr_t = std::shared_ptr; using handle_t = std::pair; auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklgpu) { throw math::invalid_argument("DFT", "compute_forward", - "DFT descriptor has not been commited for MKLGPU"); + "DFT descriptor has not been commited for MKLGPU"); } - auto handle = reinterpret_cast(commit_handle->get_handle()); + auto handle = reinterpret_cast(commit_handle->get_handle()); auto mklgpu_desc = handle->first; // First because forward DFT. int commit_status{ DFTI_UNCOMMITTED }; mklgpu_desc->get_value(oneapi::mkl::dft::config_param::COMMIT_STATUS, &commit_status); if (commit_status != DFTI_COMMITTED) { throw math::invalid_argument("DFT", "compute_forward", - "MKLGPU DFT descriptor was not successfully committed."); + "MKLGPU DFT descriptor was not successfully committed."); } // The MKLGPU backend's interface contains fewer function signatures than in this // open-source library. Consequently, it is not required to forward template arguments // to resolve to the correct function. - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::dft::compute_forward(*mklgpu_desc, std::forward(args)...)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + oneapi::mkl::dft::compute_forward(*mklgpu_desc, std::forward(args)...)); } /// Throw an math::invalid_argument if the runtime param in the descriptor does not match /// the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::detail::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -90,8 +91,8 @@ inline auto expect_config(DescT &desc, const char *message) { //In-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout) { detail::expect_config( desc, "Unexpected value for placement"); return detail::compute_forward(desc, inout); @@ -99,17 +100,18 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_forward(descriptor_type & /*desc*/, - sycl::buffer, 1> & /*inout_re*/, - sycl::buffer, 1> & /*inout_im*/) { +ONEMATH_EXPORT void compute_forward(descriptor_type& /*desc*/, + sycl::buffer, 1>& /*inout_re*/, + sycl::buffer, 1>& /*inout_im*/) { throw math::unimplemented("DFT", "compute_forward", - "MKLGPU does not support compute_forward(desc, inout_re, inout_im)."); + "MKLGPU does not support compute_forward(desc, inout_re, inout_im)."); } //Out-of-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config(desc, "Unexpected value for placement"); @@ -118,11 +120,11 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> & /*in_re*/, - sycl::buffer, 1> & /*in_im*/, - sycl::buffer, 1> & /*out_re*/, - sycl::buffer, 1> & /*out_im*/) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& /*in_re*/, + sycl::buffer, 1>& /*in_im*/, + sycl::buffer, 1>& /*out_re*/, + sycl::buffer, 1>& /*out_im*/) { detail::expect_config( desc, "Unexpected value for complex storage"); @@ -135,8 +137,8 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, //In-place transform template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for placement"); return detail::compute_forward(desc, inout, dependencies); @@ -144,10 +146,10 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type & /*desc*/, - scalar * /*inout_re*/, - scalar * /*inout_im*/, - const std::vector & /*dependencies*/) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& /*desc*/, + scalar* /*inout_re*/, + scalar* /*inout_im*/, + const std::vector& /*dependencies*/) { throw math::unimplemented( "DFT", "compute_forward", "MKLGPU does not support compute_forward(desc, inout_re, inout_im, dependencies)."); @@ -155,9 +157,9 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type & /*desc*/, //Out-of-place transform template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &dependencies) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& dependencies) { detail::expect_config(desc, "Unexpected value for placement"); @@ -166,12 +168,12 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, - scalar * /*in_re*/, - scalar * /*in_im*/, - scalar * /*out_re*/, - scalar * /*out_im*/, - const std::vector & /*dependencies*/) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, + scalar* /*in_re*/, + scalar* /*in_im*/, + scalar* /*out_re*/, + scalar* /*out_im*/, + const std::vector& /*dependencies*/) { detail::expect_config( desc, "Unexpected value for complex storage"); diff --git a/src/dft/backends/mklgpu/mklgpu_helpers.hpp b/src/dft/backends/mklgpu/mklgpu_helpers.hpp index 0687b26e7..0e440f5cf 100644 --- a/src/dft/backends/mklgpu/mklgpu_helpers.hpp +++ b/src/dft/backends/mklgpu/mklgpu_helpers.hpp @@ -79,7 +79,7 @@ inline constexpr oneapi::mkl::dft::config_param to_mklgpu(dft::detail::config_pa case iparam::COMMIT_STATUS: return oparam::COMMIT_STATUS; default: throw math::invalid_argument("dft", "MKLGPU descriptor set_value()", - "Invalid config param."); + "Invalid config param."); return static_cast(0); } } @@ -99,7 +99,7 @@ inline constexpr int to_mklgpu( } else { throw math::unimplemented("dft", "MKLGPU descriptor set_value()", - "MKLGPU only supports complex-complex for complex storage."); + "MKLGPU only supports complex-complex for complex storage."); return 0; } } @@ -112,7 +112,7 @@ inline constexpr int to_mklgpu( } else { throw math::invalid_argument("dft", "MKLGPU descriptor set_value()", - "Invalid config value for inplace."); + "Invalid config value for inplace."); return 0; } } @@ -141,7 +141,7 @@ inline constexpr int to_mklgpu( } else { throw math::invalid_argument("dft", "MKLGPU descriptor set_value()", - "Invalid config value for packed format."); + "Invalid config value for packed format."); return 0; } } @@ -151,7 +151,8 @@ inline constexpr int to_mklgpu( * @param value The config value to convert. **/ template -inline constexpr oneapi::mkl::dft::config_value to_mklgpu_config_value(dft::detail::config_value value); +inline constexpr oneapi::mkl::dft::config_value to_mklgpu_config_value( + dft::detail::config_value value); template <> inline constexpr oneapi::mkl::dft::config_value @@ -166,7 +167,7 @@ to_mklgpu_config_value( } else { throw math::invalid_argument("dft", "MKLGPU descriptor set_value()", - "Invalid config value for workspace placement."); + "Invalid config value for workspace placement."); return oneapi::mkl::dft::config_value::WORKSPACE_INTERNAL; } } diff --git a/src/dft/backends/portfft/commit.cpp b/src/dft/backends/portfft/commit.cpp index f7dbcbf83..628f93d02 100644 --- a/src/dft/backends/portfft/commit.cpp +++ b/src/dft/backends/portfft/commit.cpp @@ -62,7 +62,7 @@ class portfft_commit final : public dft::detail::commit_impl { public: portfft_commit(sycl::queue& queue, const dft::detail::dft_values& config_values) : oneapi::math::dft::detail::commit_impl(queue, backend::portfft, - config_values) { + config_values) { if constexpr (prec == dft::detail::precision::DOUBLE) { if (!queue.get_device().has(sycl::aspect::fp64)) { throw math::exception("DFT", "commit", "Device does not support double precision."); @@ -78,15 +78,15 @@ class portfft_commit final : public dft::detail::commit_impl { oneapi::math::dft::detail::config_value::WORKSPACE_EXTERNAL); if (config_values.workspace != config_value::ALLOW) { throw math::unimplemented("dft/backends/portfft", __FUNCTION__, - "portFFT only supports ALLOW for the WORKSPACE parameter"); + "portFFT only supports ALLOW for the WORKSPACE parameter"); } if (config_values.ordering != config_value::ORDERED) { throw math::unimplemented("dft/backends/portfft", __FUNCTION__, - "portFFT only supports ORDERED for the ORDERING parameter"); + "portFFT only supports ORDERED for the ORDERING parameter"); } if (config_values.transpose) { throw math::unimplemented("dft/backends/portfft", __FUNCTION__, - "portFFT does not supported transposed output"); + "portFFT does not supported transposed output"); } auto stride_api_choice = dft::detail::get_stride_api(config_values); @@ -182,14 +182,14 @@ class portfft_commit final : public dft::detail::commit_impl { dft::detail::get_commit(desc)->template compute_call_throw>( "compute_forward"); throw oneapi::math::unimplemented("DFT", "compute_forward(desc, inout_re, inout_im)", - "portFFT does not support real-real complex storage."); + "portFFT does not support real-real complex storage."); } sycl::event forward_ip_rr(descriptor_type& desc, scalar_type*, scalar_type*, const std::vector&) override { dft::detail::get_commit(desc)->template compute_call_throw("compute_forward"); throw oneapi::math::unimplemented("DFT", - "compute_forward(desc, inout_re, inout_im, dependencies)", - "portFFT does not support real-real complex storage."); + "compute_forward(desc, inout_re, inout_im, dependencies)", + "portFFT does not support real-real complex storage."); } // forward out-of-place COMPLEX_COMPLEX @@ -223,8 +223,8 @@ class portfft_commit final : public dft::detail::commit_impl { dft::detail::get_commit(desc)->template compute_call_throw>( "compute_forward"); throw oneapi::math::unimplemented("DFT", - "compute_forward(desc, in_re, in_im, out_re, out_im)", - "portFFT does not support real-real complex storage."); + "compute_forward(desc, in_re, in_im, out_re, out_im)", + "portFFT does not support real-real complex storage."); } sycl::event forward_op_rr(descriptor_type& desc, scalar_type*, scalar_type*, scalar_type*, scalar_type*, const std::vector&) override { @@ -263,15 +263,15 @@ class portfft_commit final : public dft::detail::commit_impl { dft::detail::get_commit(desc)->template compute_call_throw>( "compute_backward"); throw oneapi::math::unimplemented("DFT", "compute_backward(desc, inout_re, inout_im)", - "portFFT does not support real-real complex storage."); + "portFFT does not support real-real complex storage."); } sycl::event backward_ip_rr(descriptor_type& desc, scalar_type*, scalar_type*, const std::vector&) override { dft::detail::get_commit(desc)->template compute_call_throw( "compute_backward"); - throw oneapi::math::unimplemented("DFT", - "compute_backward(desc, inout_re, inout_im, dependencies)", - "portFFT does not support real-real complex storage."); + throw oneapi::math::unimplemented( + "DFT", "compute_backward(desc, inout_re, inout_im, dependencies)", + "portFFT does not support real-real complex storage."); } // backward out-of-place COMPLEX_COMPLEX @@ -305,8 +305,8 @@ class portfft_commit final : public dft::detail::commit_impl { dft::detail::get_commit(desc)->template compute_call_throw>( "compute_backward"); throw oneapi::math::unimplemented("DFT", - "compute_backward(desc, in_re, in_im, out_re, out_im)", - "portFFT does not support real-real complex storage."); + "compute_backward(desc, in_re, in_im, out_re, out_im)", + "portFFT does not support real-real complex storage."); } sycl::event backward_op_rr(descriptor_type& desc, scalar_type*, scalar_type*, scalar_type*, scalar_type*, const std::vector&) override { diff --git a/src/dft/backends/portfft/portfft_helper.hpp b/src/dft/backends/portfft/portfft_helper.hpp index c4270cd3c..9900d6feb 100644 --- a/src/dft/backends/portfft/portfft_helper.hpp +++ b/src/dft/backends/portfft/portfft_helper.hpp @@ -31,12 +31,12 @@ namespace pfft = portfft; namespace oneapi::math::dft::portfft::detail { template -inline dft::detail::commit_impl *checked_get_commit( - dft::detail::descriptor &desc) { +inline dft::detail::commit_impl* checked_get_commit( + dft::detail::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::portfft) { throw math::invalid_argument("dft/backends/portfft", "get_commit", - "DFT descriptor has not been commited for portFFT"); + "DFT descriptor has not been commited for portFFT"); } return commit_handle; } @@ -53,9 +53,9 @@ using storage_type = detail::to_pfft_domain::type::value>>; template -auto get_descriptors(descriptor_type &desc) { +auto get_descriptors(descriptor_type& desc) { auto commit = detail::checked_get_commit(desc); - return reinterpret_cast *>(commit->get_handle()); + return reinterpret_cast*>(commit->get_handle()); } } // namespace oneapi::math::dft::portfft::detail diff --git a/src/dft/backends/rocfft/backward.cpp b/src/dft/backends/rocfft/backward.cpp index d0f47bd00..745ff666d 100644 --- a/src/dft/backends/rocfft/backward.cpp +++ b/src/dft/backends/rocfft/backward.cpp @@ -39,24 +39,24 @@ namespace oneapi::math::dft::rocfft { namespace detail { //forward declaration template -std::array get_offsets_bwd(dft::detail::commit_impl *commit); +std::array get_offsets_bwd(dft::detail::commit_impl* commit); template -rocfft_plan get_bwd_plan(dft::detail::commit_impl *commit) { - return static_cast(commit->get_handle())[1].plan.value(); +rocfft_plan get_bwd_plan(dft::detail::commit_impl* commit) { + return static_cast(commit->get_handle())[1].plan.value(); } template -rocfft_execution_info get_bwd_info(dft::detail::commit_impl *commit) { - return static_cast(commit->get_handle())[1].info.value(); +rocfft_execution_info get_bwd_info(dft::detail::commit_impl* commit) { + return static_cast(commit->get_handle())[1].info.value(); } } // namespace detail // BUFFER version //In-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout) { const std::string func_name = "compute_backward(desc, inout)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -75,26 +75,26 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!"); } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_acc = inout.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - auto inout_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, inout_acc)) + + auto inout_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, inout_acc)) + offsets[0]); - detail::execute_checked(func_name, stream, plan, &inout_native, nullptr, info); + detail::execute_checked(func_name, stream, plan, &inout_native, nullptr, info); }); }); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im) { const std::string func_name = "compute_backward(desc, inout_re, inout_im)"; auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); @@ -108,7 +108,7 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!"); } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_re_acc = inout_re.template get_access(cgh); auto inout_im_acc = inout_im.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); @@ -116,24 +116,24 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - std::array inout_native{ - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, inout_re_acc)) + - offsets[0]), - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, inout_im_acc)) + - offsets[0]) + std::array inout_native{ + reinterpret_cast(reinterpret_cast*>( + detail::native_mem(ih, inout_re_acc)) + + offsets[0]), + reinterpret_cast(reinterpret_cast*>( + detail::native_mem(ih, inout_im_acc)) + + offsets[0]) }; - detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); + detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); }); }); } //Out-of-place transform template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config( desc, "Unexpected value for placement"); auto commit = detail::checked_get_commit(desc); @@ -142,7 +142,7 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, auto info = detail::get_bwd_info(commit); auto offsets = detail::get_offsets_bwd(commit); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto in_acc = in.template get_access(cgh); auto out_acc = out.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); @@ -151,31 +151,31 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, const std::string func_name = "compute_backward(desc, in, out)"; auto stream = detail::setup_stream(func_name, ih, info); - auto in_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_acc)) + + auto in_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_acc)) + offsets[0]); - auto out_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, out_acc)) + + auto out_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_acc)) + offsets[1]); - detail::execute_checked(func_name, stream, plan, &in_native, &out_native, info); + detail::execute_checked(func_name, stream, plan, &in_native, &out_native, info); }); }); } //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im) { +ONEMATH_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im) { auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); auto plan = detail::get_bwd_plan(commit); auto info = detail::get_bwd_info(commit); auto offsets = detail::get_offsets_bwd(commit); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto in_re_acc = in_re.template get_access(cgh); auto in_im_acc = in_im.template get_access(cgh); auto out_re_acc = out_re.template get_access(cgh); @@ -186,23 +186,24 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, const std::string func_name = "compute_backward(desc, in_re, in_im, out_re, out_im)"; auto stream = detail::setup_stream(func_name, ih, info); - std::array in_native{ - reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_re_acc)) + + std::array in_native{ + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_re_acc)) + offsets[0]), - reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_im_acc)) + + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_im_acc)) + offsets[0]) }; - std::array out_native{ - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, out_re_acc)) + - offsets[1]), - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, out_im_acc)) + - offsets[1]) + std::array out_native{ + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_re_acc)) + + offsets[1]), + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_im_acc)) + + offsets[1]) }; - detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), info); + detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), + info); }); }); } @@ -211,8 +212,8 @@ ONEMATH_EXPORT void compute_backward(descriptor_type &desc, //In-place transform template -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &deps) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& deps) { const std::string func_name = "compute_backward(desc, inout, deps)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -232,15 +233,15 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - void *inout_ptr = inout; - detail::execute_checked(func_name, stream, plan, &inout_ptr, nullptr, info); + void* inout_ptr = inout; + detail::execute_checked(func_name, stream, plan, &inout_ptr, nullptr, info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); @@ -249,9 +250,10 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &deps) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, + scalar* inout_re, + scalar* inout_im, + const std::vector& deps) { const std::string func_name = "compute_backward(desc, inout_re, inout_im, deps)"; auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); @@ -265,16 +267,15 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalardepend_on_last_usm_workspace_event_if_rqd(cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - std::array inout_native{ inout_re + offsets[0], inout_im + offsets[0] }; - detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); - + std::array inout_native{ inout_re + offsets[0], inout_im + offsets[0] }; + detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); @@ -283,9 +284,9 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &deps) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& deps) { detail::expect_config( desc, "Unexpected value for placement"); auto commit = detail::checked_get_commit(desc); @@ -297,7 +298,7 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -305,9 +306,9 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwdset_last_usm_workspace_event_if_rqd(sycl_event); @@ -316,18 +317,18 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd -ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &deps) { +ONEMATH_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& deps) { auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); auto plan = detail::get_bwd_plan(commit); auto info = detail::get_bwd_info(commit); auto offsets = detail::get_offsets_bwd(commit); - sycl::event sycl_event = queue.submit([&](sycl::handler &cgh) { + sycl::event sycl_event = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(deps); commit->depend_on_last_usm_workspace_event_if_rqd(cgh); @@ -336,9 +337,10 @@ ONEMATH_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar in_native{ in_re + offsets[0], in_im + offsets[0] }; - std::array out_native{ out_re + offsets[1], out_im + offsets[1] }; - detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), info); + std::array in_native{ in_re + offsets[0], in_im + offsets[0] }; + std::array out_native{ out_re + offsets[1], out_im + offsets[1] }; + detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), + info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); diff --git a/src/dft/backends/rocfft/commit.cpp b/src/dft/backends/rocfft/commit.cpp index d650251e6..4c5d51d2f 100644 --- a/src/dft/backends/rocfft/commit.cpp +++ b/src/dft/backends/rocfft/commit.cpp @@ -94,7 +94,7 @@ class rocfft_commit final : public dft::detail::commit_impl { public: rocfft_commit(sycl::queue& queue, const dft::detail::dft_values& config_values) : oneapi::math::dft::detail::commit_impl(queue, backend::rocfft, - config_values) { + config_values) { if constexpr (prec == dft::detail::precision::DOUBLE) { if (!queue.get_device().has(sycl::aspect::fp64)) { throw math::exception("DFT", "commit", "Device does not support double precision."); @@ -108,14 +108,14 @@ class rocfft_commit final : public dft::detail::commit_impl { if (handles[0].plan) { if (rocfft_plan_destroy(handles[0].plan.value()) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to destroy forward plan."); + "Failed to destroy forward plan."); } handles[0].plan = std::nullopt; } if (handles[1].plan) { if (rocfft_plan_destroy(handles[1].plan.value()) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to destroy backward plan."); + "Failed to destroy backward plan."); } handles[1].plan = std::nullopt; } @@ -123,14 +123,14 @@ class rocfft_commit final : public dft::detail::commit_impl { if (handles[0].info) { if (rocfft_execution_info_destroy(handles[0].info.value()) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to destroy forward execution info ."); + "Failed to destroy forward execution info ."); } handles[0].info = std::nullopt; } if (handles[1].info) { if (rocfft_execution_info_destroy(handles[1].info.value()) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to destroy backward execution info ."); + "Failed to destroy backward execution info ."); } handles[1].info = std::nullopt; } @@ -280,17 +280,17 @@ class rocfft_commit final : public dft::detail::commit_impl { rocfft_plan_description plan_desc_fwd, plan_desc_bwd; // Can't reuse with ROCm 6 due to bug. if (rocfft_plan_description_create(&plan_desc_fwd) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to create plan description."); + "Failed to create plan description."); } if (rocfft_plan_description_create(&plan_desc_bwd) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to create plan description."); + "Failed to create plan description."); } // plan_description can be destroyed afted plan_create auto description_destroy = [](rocfft_plan_description p) { if (rocfft_plan_description_destroy(p) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to destroy plan description."); + "Failed to destroy plan description."); } }; std::unique_ptr @@ -359,13 +359,13 @@ class rocfft_commit final : public dft::detail::commit_impl { ); if (res != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to set forward data layout."); + "Failed to set forward data layout."); } if (rocfft_plan_description_set_scale_factor(plan_desc_fwd, config_values.fwd_scale) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to set forward scale factor."); + "Failed to set forward scale factor."); } rocfft_plan fwd_plan; @@ -374,7 +374,7 @@ class rocfft_commit final : public dft::detail::commit_impl { if (res != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to create forward plan."); + "Failed to create forward plan."); } handles[0].plan = fwd_plan; @@ -382,7 +382,7 @@ class rocfft_commit final : public dft::detail::commit_impl { rocfft_execution_info fwd_info; if (rocfft_execution_info_create(&fwd_info) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to create forward execution info."); + "Failed to create forward execution info."); } handles[0].info = fwd_info; @@ -392,7 +392,7 @@ class rocfft_commit final : public dft::detail::commit_impl { void* work_buf; if (hipMalloc(&work_buf, work_buf_size) != hipSuccess) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to get allocate forward work buffer."); + "Failed to get allocate forward work buffer."); } set_workspace_impl(handles[0], reinterpret_cast(work_buf), work_buf_size, "commit"); @@ -415,13 +415,13 @@ class rocfft_commit final : public dft::detail::commit_impl { ); if (res != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to set backward data layout."); + "Failed to set backward data layout."); } if (rocfft_plan_description_set_scale_factor(plan_desc_bwd, config_values.bwd_scale) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to set backward scale factor."); + "Failed to set backward scale factor."); } rocfft_plan bwd_plan; @@ -429,14 +429,14 @@ class rocfft_commit final : public dft::detail::commit_impl { lengths.data(), number_of_transforms, plan_desc_bwd); if (res != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to create backward rocFFT plan."); + "Failed to create backward rocFFT plan."); } handles[1].plan = bwd_plan; rocfft_execution_info bwd_info; if (rocfft_execution_info_create(&bwd_info) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to create backward execution info."); + "Failed to create backward execution info."); } handles[1].info = bwd_info; @@ -446,7 +446,7 @@ class rocfft_commit final : public dft::detail::commit_impl { void* work_buf; if (hipMalloc(&work_buf, work_buf_size) != hipSuccess) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to get allocate backward work buffer."); + "Failed to get allocate backward work buffer."); } set_workspace_impl(handles[1], reinterpret_cast(work_buf), work_buf_size, "commit"); @@ -489,7 +489,7 @@ class rocfft_commit final : public dft::detail::commit_impl { std::size_t size = 0; if (rocfft_plan_get_work_buffer_size(*handle.plan, &size) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", function, - "Failed to get rocfft work buffer size."); + "Failed to get rocfft work buffer size."); } return static_cast(size); } @@ -530,7 +530,7 @@ class rocfft_commit final : public dft::detail::commit_impl { if (handle.buffer) { if (hipFree(*handle.buffer) != hipSuccess) { throw math::exception("dft/backends/rocfft", function, - "Failed to free internal buffer."); + "Failed to free internal buffer."); } handle.buffer = std::nullopt; } @@ -584,12 +584,12 @@ class rocfft_commit final : public dft::detail::commit_impl { // plan work buffer if (plan == nullptr) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Missing internal rocFFT plan."); + "Missing internal rocFFT plan."); } std::size_t work_buf_size; if (rocfft_plan_get_work_buffer_size(plan, &work_buf_size) != rocfft_status_success) { throw math::exception("dft/backends/rocfft", __FUNCTION__, - "Failed to get work buffer size."); + "Failed to get work buffer size."); } return static_cast(work_buf_size); } diff --git a/src/dft/backends/rocfft/execute_helper.hpp b/src/dft/backends/rocfft/execute_helper.hpp index b73bc3a32..c1ee6302b 100644 --- a/src/dft/backends/rocfft/execute_helper.hpp +++ b/src/dft/backends/rocfft/execute_helper.hpp @@ -37,12 +37,12 @@ namespace oneapi::math::dft::rocfft::detail { template -inline dft::detail::commit_impl *checked_get_commit( - dft::detail::descriptor &desc) { +inline dft::detail::commit_impl* checked_get_commit( + dft::detail::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::rocfft) { throw math::invalid_argument("dft/backends/rocfft", "get_commit", - "DFT descriptor has not been commited for rocFFT"); + "DFT descriptor has not been commited for rocFFT"); } return commit_handle; } @@ -50,7 +50,7 @@ inline dft::detail::commit_impl *checked_get_commit( /// Throw an math::invalid_argument if the runtime param in the descriptor does not match /// the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -59,11 +59,11 @@ inline auto expect_config(DescT &desc, const char *message) { } template -inline void *native_mem(sycl::interop_handle &ih, Acc &buf) { +inline void* native_mem(sycl::interop_handle& ih, Acc& buf) { return ih.get_native_mem(buf); } -inline hipStream_t setup_stream(const std::string &func, sycl::interop_handle &ih, +inline hipStream_t setup_stream(const std::string& func, sycl::interop_handle& ih, rocfft_execution_info info) { auto stream = ih.get_native_queue(); auto result = rocfft_execution_info_set_stream(info, stream); @@ -75,16 +75,16 @@ inline hipStream_t setup_stream(const std::string &func, sycl::interop_handle &i return stream; } -inline void sync_checked(const std::string &func, hipStream_t stream) { - auto result = hipStreamSynchronize(stream); - if (result != hipSuccess) { - throw oneapi::math::exception("dft/backends/rocfft", func, - "hipStreamSynchronize returned " + std::to_string(result)); - } +inline void sync_checked(const std::string& func, hipStream_t stream) { + auto result = hipStreamSynchronize(stream); + if (result != hipSuccess) { + throw oneapi::math::exception("dft/backends/rocfft", func, + "hipStreamSynchronize returned " + std::to_string(result)); + } } -inline void execute_checked(const std::string &func, hipStream_t stream, const rocfft_plan plan, void *in_buffer[], - void *out_buffer[], rocfft_execution_info info) { +inline void execute_checked(const std::string& func, hipStream_t stream, const rocfft_plan plan, + void* in_buffer[], void* out_buffer[], rocfft_execution_info info) { auto result = rocfft_execute(plan, in_buffer, out_buffer, info); if (result != rocfft_status_success) { throw oneapi::math::exception("dft/backends/rocfft", func, diff --git a/src/dft/backends/rocfft/forward.cpp b/src/dft/backends/rocfft/forward.cpp index dfec5fbd2..40f8834a3 100644 --- a/src/dft/backends/rocfft/forward.cpp +++ b/src/dft/backends/rocfft/forward.cpp @@ -41,16 +41,16 @@ namespace oneapi::math::dft::rocfft { namespace detail { //forward declaration template -std::array get_offsets_fwd(dft::detail::commit_impl *commit); +std::array get_offsets_fwd(dft::detail::commit_impl* commit); template -rocfft_plan get_fwd_plan(dft::detail::commit_impl *commit) { - return static_cast(commit->get_handle())[0].plan.value(); +rocfft_plan get_fwd_plan(dft::detail::commit_impl* commit) { + return static_cast(commit->get_handle())[0].plan.value(); } template -rocfft_execution_info get_fwd_info(dft::detail::commit_impl *commit) { - return static_cast(commit->get_handle())[0].info.value(); +rocfft_execution_info get_fwd_info(dft::detail::commit_impl* commit) { + return static_cast(commit->get_handle())[0].info.value(); } } // namespace detail @@ -58,8 +58,8 @@ rocfft_execution_info get_fwd_info(dft::detail::commit_impl *commit) //In-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout) { const std::string func_name = "compute_forward(desc, inout)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -78,26 +78,26 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!"); } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_acc = inout.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - auto inout_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, inout_acc)) + + auto inout_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, inout_acc)) + offsets[0]); - detail::execute_checked(func_name, stream, plan, &inout_native, nullptr, info); + detail::execute_checked(func_name, stream, plan, &inout_native, nullptr, info); }); }); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im) { const std::string func_name = "compute_forward(desc, inout_re, inout_im)"; auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); @@ -111,7 +111,7 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!"); } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_re_acc = inout_re.template get_access(cgh); auto inout_im_acc = inout_im.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); @@ -119,23 +119,24 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - std::array inout_native{ - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, inout_re_acc)) + - offsets[0]), - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, inout_im_acc)) + - offsets[0]) + std::array inout_native{ + reinterpret_cast(reinterpret_cast*>( + detail::native_mem(ih, inout_re_acc)) + + offsets[0]), + reinterpret_cast(reinterpret_cast*>( + detail::native_mem(ih, inout_im_acc)) + + offsets[0]) }; - detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); + detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); }); }); } //Out-of-place transform template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config( desc, "Unexpected value for placement"); auto commit = detail::checked_get_commit(desc); @@ -144,7 +145,7 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer(cgh); auto out_acc = out.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); @@ -153,31 +154,31 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer( - reinterpret_cast *>(detail::native_mem(ih, in_acc)) + + auto in_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_acc)) + offsets[0]); - auto out_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, out_acc)) + + auto out_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_acc)) + offsets[1]); - detail::execute_checked(func_name, stream, plan, &in_native, &out_native, info); + detail::execute_checked(func_name, stream, plan, &in_native, &out_native, info); }); }); } //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMATH_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im) { +ONEMATH_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im) { auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); auto plan = detail::get_fwd_plan(commit); auto info = detail::get_fwd_info(commit); auto offsets = detail::get_offsets_fwd(commit); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto in_re_acc = in_re.template get_access(cgh); auto in_im_acc = in_im.template get_access(cgh); auto out_re_acc = out_re.template get_access(cgh); @@ -188,23 +189,24 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, const std::string func_name = "compute_forward(desc, in_re, in_im, out_re, out_im)"; auto stream = detail::setup_stream(func_name, ih, info); - std::array in_native{ - reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_re_acc)) + + std::array in_native{ + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_re_acc)) + offsets[0]), - reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_im_acc)) + + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_im_acc)) + offsets[0]) }; - std::array out_native{ - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, out_re_acc)) + - offsets[1]), - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, out_im_acc)) + - offsets[1]) + std::array out_native{ + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_re_acc)) + + offsets[1]), + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_im_acc)) + + offsets[1]) }; - detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), info); + detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), + info); }); }); } @@ -213,8 +215,8 @@ ONEMATH_EXPORT void compute_forward(descriptor_type &desc, //In-place transform template -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &deps) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& deps) { const std::string func_name = "compute_forward(desc, inout, deps)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -234,15 +236,15 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - void *inout_ptr = inout; - detail::execute_checked(func_name, stream, plan, &inout_ptr, nullptr, info); + void* inout_ptr = inout; + detail::execute_checked(func_name, stream, plan, &inout_ptr, nullptr, info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); @@ -251,9 +253,9 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &deps) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* inout_re, + scalar* inout_im, + const std::vector& deps) { const std::string func_name = "compute_forward(desc, inout_re, inout_im, deps)"; auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); @@ -267,14 +269,14 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalardepend_on_last_usm_workspace_event_if_rqd(cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - std::array inout_native{ inout_re + offsets[0], inout_im + offsets[0] }; - detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); + std::array inout_native{ inout_re + offsets[0], inout_im + offsets[0] }; + detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); @@ -283,9 +285,9 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &deps) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& deps) { detail::expect_config( desc, "Unexpected value for placement"); auto commit = detail::checked_get_commit(desc); @@ -297,7 +299,7 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -305,9 +307,9 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwdset_last_usm_workspace_event_if_rqd(sycl_event); @@ -316,18 +318,18 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &deps) { +ONEMATH_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& deps) { auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); auto plan = detail::get_fwd_plan(commit); auto info = detail::get_fwd_info(commit); auto offsets = detail::get_offsets_fwd(commit); - sycl::event sycl_event = queue.submit([&](sycl::handler &cgh) { + sycl::event sycl_event = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(deps); commit->depend_on_last_usm_workspace_event_if_rqd(cgh); @@ -336,9 +338,10 @@ ONEMATH_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar in_native{ in_re + offsets[0], in_im + offsets[0] }; - std::array out_native{ out_re + offsets[1], out_im + offsets[1] }; - detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), info); + std::array in_native{ in_re + offsets[0], in_im + offsets[0] }; + std::array out_native{ out_re + offsets[1], out_im + offsets[1] }; + detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), + info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); diff --git a/src/dft/backends/stride_helper.hpp b/src/dft/backends/stride_helper.hpp index e00d0528e..8375b472e 100644 --- a/src/dft/backends/stride_helper.hpp +++ b/src/dft/backends/stride_helper.hpp @@ -76,7 +76,7 @@ struct stride_vectors { bwd_out(stride_choice == stride_api::FB_STRIDES ? vec_a : vec_b) { if (stride_choice == stride_api::INVALID) { throw math::exception("DFT", "detail::stride_vector constructor", - "Internal error: invalid stride API"); + "Internal error: invalid stride API"); } auto& v1 = stride_choice == stride_api::FB_STRIDES ? config_values.fwd_strides : config_values.input_strides; @@ -89,7 +89,7 @@ struct stride_vectors { if constexpr (std::is_unsigned_v) { if (v1[i] < 0 || v2[i] < 0) { throw math::unimplemented("DFT", "commit", - "Backend does not support negative strides."); + "Backend does not support negative strides."); } } vec_a[i] = static_cast(v1[i]); diff --git a/src/dft/descriptor.cxx b/src/dft/descriptor.cxx index df90d9f0e..57ed1f086 100644 --- a/src/dft/descriptor.cxx +++ b/src/dft/descriptor.cxx @@ -157,7 +157,7 @@ descriptor::descriptor(std::vector dimensions) { for (const auto& dim : dimensions) { if (dim <= 0) { throw math::invalid_argument("DFT", "descriptor", - "Invalid dimension value (negative or 0)."); + "Invalid dimension value (negative or 0)."); } } compute_default_strides(dimensions, values_.fwd_strides, values_.bwd_strides); @@ -283,7 +283,7 @@ void descriptor::set_workspace(scalar_type* usm_workspace) { } else { throw math::uninitialized("DFT", "set_workspace", - "Can only set workspace on committed descriptor."); + "Can only set workspace on committed descriptor."); } } @@ -294,7 +294,7 @@ void descriptor::set_workspace(sycl::buffer& buffer_work } else { throw math::uninitialized("DFT", "set_workspace", - "Can only set workspace on committed descriptor."); + "Can only set workspace on committed descriptor."); } } diff --git a/src/dft/descriptor_config_helper.hpp b/src/dft/descriptor_config_helper.hpp index c4d319096..00e3c8c79 100644 --- a/src/dft/descriptor_config_helper.hpp +++ b/src/dft/descriptor_config_helper.hpp @@ -118,7 +118,7 @@ void set_value(dft_values& vals, for (std::size_t i{ 0 }; i < vals.dimensions.size(); ++i) { if (set_val[i] <= 0) { throw math::invalid_argument("DFT", "set_value", - "Invalid length value (negative or 0)."); + "Invalid length value (negative or 0)."); } } std::copy(set_val, set_val + vals.dimensions.size(), vals.dimensions.begin()); @@ -135,7 +135,7 @@ void set_value(dft_values& vals, else if constexpr (Param == config_param::NUMBER_OF_TRANSFORMS) { if (set_val <= 0) { throw math::invalid_argument("DFT", "set_value", - "Number of transforms must be positive."); + "Number of transforms must be positive."); } vals.number_of_transforms = set_val; } @@ -145,7 +145,7 @@ void set_value(dft_values& vals, } else { throw math::invalid_argument("DFT", "set_value", - "Complex storage must be complex_complex or real_real."); + "Complex storage must be complex_complex or real_real."); } } else if constexpr (Param == config_param::REAL_STORAGE) { @@ -162,7 +162,7 @@ void set_value(dft_values& vals, } else { throw math::invalid_argument("DFT", "set_value", - "Conjugate even storage must be complex_complex."); + "Conjugate even storage must be complex_complex."); } } else if constexpr (Param == config_param::PLACEMENT) { @@ -171,7 +171,7 @@ void set_value(dft_values& vals, } else { throw math::invalid_argument("DFT", "set_value", - "Placement must be inplace or not inplace."); + "Placement must be inplace or not inplace."); } } #pragma clang diagnostic push @@ -224,7 +224,7 @@ void set_value(dft_values& vals, } else { throw math::invalid_argument("DFT", "set_value", - "Ordering must be ordered or backwards scrambled."); + "Ordering must be ordered or backwards scrambled."); } } else if constexpr (Param == config_param::TRANSPOSE) { diff --git a/src/dft/execute_helper_generic.hpp b/src/dft/execute_helper_generic.hpp index ba4cc709a..4f5ae6727 100644 --- a/src/dft/execute_helper_generic.hpp +++ b/src/dft/execute_helper_generic.hpp @@ -40,9 +40,9 @@ namespace oneapi::math::dft::detail { template static inline void fft_enqueue_task(HandlerT&& cgh, FnT&& f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else - cgh.host_task([=](sycl::interop_handle ih){ + cgh.host_task([=](sycl::interop_handle ih) { #endif f(std::move(ih)); }); diff --git a/src/dft/function_table.hpp b/src/dft/function_table.hpp index 244a06808..d1c647eb9 100644 --- a/src/dft/function_table.hpp +++ b/src/dft/function_table.hpp @@ -36,26 +36,28 @@ typedef struct { int version; oneapi::math::dft::detail::commit_impl* ( + oneapi::math::dft::domain::COMPLEX>* ( *create_commit_sycl_fz)( const oneapi::math::dft::descriptor& desc, + oneapi::math::dft::domain::COMPLEX>& desc, sycl::queue& sycl_queue); oneapi::math::dft::detail::commit_impl* ( + oneapi::math::dft::domain::COMPLEX>* ( *create_commit_sycl_dz)( const oneapi::math::dft::descriptor& desc, + oneapi::math::dft::domain::COMPLEX>& desc, sycl::queue& sycl_queue); oneapi::math::dft::detail::commit_impl* (*create_commit_sycl_fr)( + oneapi::math::dft::domain::REAL>* ( + *create_commit_sycl_fr)( const oneapi::math::dft::descriptor& desc, + oneapi::math::dft::domain::REAL>& desc, sycl::queue& sycl_queue); oneapi::math::dft::detail::commit_impl* (*create_commit_sycl_dr)( + oneapi::math::dft::domain::REAL>* ( + *create_commit_sycl_dr)( const oneapi::math::dft::descriptor& desc, + oneapi::math::dft::domain::REAL>& desc, sycl::queue& sycl_queue); } dft_function_table_t; diff --git a/src/include/allocator_helper.hpp b/src/include/allocator_helper.hpp index 9dfd6e36e..433e0468e 100644 --- a/src/include/allocator_helper.hpp +++ b/src/include/allocator_helper.hpp @@ -29,7 +29,7 @@ namespace oneapi { namespace math { -static inline void *aligned_alloc(size_t align, size_t size) { +static inline void* aligned_alloc(size_t align, size_t size) { #ifdef _WIN64 return ::_aligned_malloc(size, align); #else @@ -37,7 +37,7 @@ static inline void *aligned_alloc(size_t align, size_t size) { #endif } -static inline void aligned_free(void *p) { +static inline void aligned_free(void* p) { #ifdef _WIN64 ::_aligned_free(p); #else diff --git a/src/include/common_onemkl_conversion.hpp b/src/include/common_onemkl_conversion.hpp index a54669e71..47d9bb0a4 100644 --- a/src/include/common_onemkl_conversion.hpp +++ b/src/include/common_onemkl_conversion.hpp @@ -33,73 +33,120 @@ namespace oneapi { namespace math { namespace detail { -inline auto get_onemkl_transpose(oneapi::math::transpose* param_ptr) { return reinterpret_cast(param_ptr); } -inline auto get_onemkl_transpose(oneapi::math::transpose param) { return *get_onemkl_transpose(¶m); } - -inline auto get_onemkl_uplo(oneapi::math::uplo* param_ptr) { return reinterpret_cast(param_ptr); } -inline auto get_onemkl_uplo(oneapi::math::uplo param) { return *get_onemkl_uplo(¶m); } - -inline auto get_onemkl_diag(oneapi::math::diag* param_ptr) { return reinterpret_cast(param_ptr); } -inline auto get_onemkl_diag(oneapi::math::diag param) { return *get_onemkl_diag(¶m); } - -inline auto get_onemkl_side(oneapi::math::side* param_ptr) { return reinterpret_cast(param_ptr); } -inline auto get_onemkl_side(oneapi::math::side param) { return *get_onemkl_side(¶m); } - -inline auto get_onemkl_offset(oneapi::math::offset param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_layout(oneapi::math::layout param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_index_base(oneapi::math::index_base param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_job(oneapi::math::job param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_jobsvd(oneapi::math::jobsvd param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_generate(oneapi::math::generate param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_compz(oneapi::math::compz param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_direct(oneapi::math::direct param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_storev(oneapi::math::storev param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_rangev(oneapi::math::rangev param) { return *reinterpret_cast(¶m); } - -inline auto get_onemkl_order(oneapi::math::order param) { return *reinterpret_cast(¶m); } +inline auto get_onemkl_transpose(oneapi::math::transpose* param_ptr) { + return reinterpret_cast(param_ptr); +} +inline auto get_onemkl_transpose(oneapi::math::transpose param) { + return *get_onemkl_transpose(¶m); +} + +inline auto get_onemkl_uplo(oneapi::math::uplo* param_ptr) { + return reinterpret_cast(param_ptr); +} +inline auto get_onemkl_uplo(oneapi::math::uplo param) { + return *get_onemkl_uplo(¶m); +} + +inline auto get_onemkl_diag(oneapi::math::diag* param_ptr) { + return reinterpret_cast(param_ptr); +} +inline auto get_onemkl_diag(oneapi::math::diag param) { + return *get_onemkl_diag(¶m); +} + +inline auto get_onemkl_side(oneapi::math::side* param_ptr) { + return reinterpret_cast(param_ptr); +} +inline auto get_onemkl_side(oneapi::math::side param) { + return *get_onemkl_side(¶m); +} + +inline auto get_onemkl_offset(oneapi::math::offset param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_layout(oneapi::math::layout param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_index_base(oneapi::math::index_base param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_job(oneapi::math::job param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_jobsvd(oneapi::math::jobsvd param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_generate(oneapi::math::generate param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_compz(oneapi::math::compz param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_direct(oneapi::math::direct param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_storev(oneapi::math::storev param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_rangev(oneapi::math::rangev param) { + return *reinterpret_cast(¶m); +} + +inline auto get_onemkl_order(oneapi::math::order param) { + return *reinterpret_cast(¶m); +} // Rethrow Intel(R) oneMKL exceptions as oneMath exceptions -#define RETHROW_ONEMKL_EXCEPTIONS(EXPRESSION) \ -do { \ - try { \ - EXPRESSION; \ - } catch(const oneapi::mkl::unsupported_device& e) { \ - throw unsupported_device(e.what()); \ - } catch(const oneapi::mkl::host_bad_alloc& e) { \ - throw host_bad_alloc(e.what()); \ - } catch(const oneapi::mkl::device_bad_alloc& e) { \ - throw device_bad_alloc(e.what()); \ - } catch(const oneapi::mkl::unimplemented& e) { \ - throw unimplemented(e.what()); \ - } catch(const oneapi::mkl::invalid_argument& e) { \ - throw invalid_argument(e.what()); \ - } catch(const oneapi::mkl::uninitialized& e) { \ - throw uninitialized(e.what()); \ - } catch(const oneapi::mkl::computation_error& e) { \ - throw computation_error(e.what()); \ - } catch(const oneapi::mkl::batch_error& e) { \ - throw batch_error(e.what()); \ - } catch(const oneapi::mkl::exception& e) { \ - throw exception(e.what()); \ - } \ -} while (0) - -#define RETHROW_ONEMKL_EXCEPTIONS_RET(EXPRESSION) \ -do { \ -RETHROW_ONEMKL_EXCEPTIONS(return EXPRESSION); \ -} while(0) - -} // namespace detail -} // namespace math -} // namespace oneapi +#define RETHROW_ONEMKL_EXCEPTIONS(EXPRESSION) \ + do { \ + try { \ + EXPRESSION; \ + } \ + catch (const oneapi::mkl::unsupported_device& e) { \ + throw unsupported_device(e.what()); \ + } \ + catch (const oneapi::mkl::host_bad_alloc& e) { \ + throw host_bad_alloc(e.what()); \ + } \ + catch (const oneapi::mkl::device_bad_alloc& e) { \ + throw device_bad_alloc(e.what()); \ + } \ + catch (const oneapi::mkl::unimplemented& e) { \ + throw unimplemented(e.what()); \ + } \ + catch (const oneapi::mkl::invalid_argument& e) { \ + throw invalid_argument(e.what()); \ + } \ + catch (const oneapi::mkl::uninitialized& e) { \ + throw uninitialized(e.what()); \ + } \ + catch (const oneapi::mkl::computation_error& e) { \ + throw computation_error(e.what()); \ + } \ + catch (const oneapi::mkl::batch_error& e) { \ + throw batch_error(e.what()); \ + } \ + catch (const oneapi::mkl::exception& e) { \ + throw exception(e.what()); \ + } \ + } while (0) + +#define RETHROW_ONEMKL_EXCEPTIONS_RET(EXPRESSION) \ + do { \ + RETHROW_ONEMKL_EXCEPTIONS(return EXPRESSION); \ + } while (0) + +} // namespace detail +} // namespace math +} // namespace oneapi #endif // _ONEMATH_SRC_INCLUDE_COMMON_ONEMKL_TYPES_CONVERSION_HPP_ diff --git a/src/include/function_table_initializer.hpp b/src/include/function_table_initializer.hpp index c32cb691d..3e1f0824b 100644 --- a/src/include/function_table_initializer.hpp +++ b/src/include/function_table_initializer.hpp @@ -30,7 +30,7 @@ #ifdef __linux__ #include -#define LIB_TYPE void * +#define LIB_TYPE void* #define GET_LIB_HANDLE(libname) dlopen((libname), RTLD_LAZY | RTLD_GLOBAL) #define GET_FUNC(lib, fn) dlsym(lib, (fn)) #define FREE_LIB_HANDLE(libname) dlclose(libname) @@ -59,7 +59,7 @@ class table_initializer { using dlhandle = std::unique_ptr; public: - function_table_t &operator[](std::pair device_queue_pair) { + function_table_t& operator[](std::pair device_queue_pair) { auto lib = tables.find(device_queue_pair.first); if (lib != tables.end()) return lib->second; @@ -96,10 +96,10 @@ class table_initializer { } #endif - function_table_t &add_table(oneapi::math::device key, sycl::queue &q) { + function_table_t& add_table(oneapi::math::device key, sycl::queue& q) { dlhandle handle; // check all available libraries for the key(device) - for (const char *libname : libraries[domain_id][key]) { + for (const char* libname : libraries[domain_id][key]) { handle = dlhandle{ ::GET_LIB_HANDLE(libname) }; if (handle) break; @@ -114,7 +114,7 @@ class table_initializer { } } auto t = - reinterpret_cast(::GET_FUNC(handle.get(), table_names[domain_id])); + reinterpret_cast(::GET_FUNC(handle.get(), table_names[domain_id])); if (!t) { std::cerr << ERROR_MSG << '\n'; diff --git a/src/lapack/backends/cusolver/cusolver_batch.cpp b/src/lapack/backends/cusolver/cusolver_batch.cpp index f2c990963..3ad137912 100644 --- a/src/lapack/backends/cusolver/cusolver_batch.cpp +++ b/src/lapack/backends/cusolver/cusolver_batch.cpp @@ -31,24 +31,24 @@ namespace cusolver { // BATCH BUFFER API template -inline void geqrf_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void geqrf_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, stride_a, stride_tau, batch_size, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -62,10 +62,10 @@ inline void geqrf_batch(const char *func_name, Func func, sycl::queue &queue, st } #define GEQRF_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, \ + void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, \ std::int64_t stride_tau, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return geqrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size); \ } @@ -78,10 +78,10 @@ GEQRF_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZgeqrf) #undef GEQRF_STRIDED_BATCH_LAUNCHER template -inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +inline void getri_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; @@ -91,7 +91,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st sycl::buffer ipiv32(sycl::range<1>{ ipiv32_size }); sycl::buffer devInfo{ batch_size }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv_acc = sycl::accessor{ ipiv, cgh, sycl::read_only }; auto ipiv32_acc = sycl::accessor{ ipiv32, cgh, sycl::write_only }; cgh.parallel_for(sycl::range<1>{ ipiv32_size }, [=](sycl::id<1> index) { @@ -102,7 +102,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st // getri_batched is contained within cublas, not cusolver. For this reason // we need to use cublas types instead of cusolver types (as is needed for // other lapack routines) - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { using blas::cublas::cublas_error; sycl::accessor a_acc{ a, cgh, sycl::read_only }; @@ -110,7 +110,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st sycl::accessor ipiv32_acc{ ipiv32, cgh }; sycl::accessor devInfo_acc{ devInfo, cgh, sycl::write_only }; - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { cublasStatus_t err; CUresult cuda_result; cublasHandle_t cublas_handle; @@ -118,27 +118,28 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st CUstream cu_stream = sycl::get_native(queue); CUBLAS_ERROR_FUNC(cublasSetStream, err, cublas_handle, cu_stream); - auto a_ = sc.get_mem(a_acc); - auto scratch_ = sc.get_mem(scratch_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto info_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto scratch_ = sc.get_mem(scratch_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto info_ = sc.get_mem(devInfo_acc); CUdeviceptr a_dev; - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); + auto** a_dev_ = reinterpret_cast(a_dev); CUdeviceptr scratch_dev; - cuDataType **scratch_batched = + cuDataType** scratch_batched = create_ptr_list_from_stride(scratch_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &scratch_dev, sizeof(T *) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &scratch_dev, sizeof(T*) * batch_size); CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, scratch_dev, scratch_batched, - sizeof(T *) * batch_size); - auto **scratch_dev_ = reinterpret_cast(scratch_dev); + sizeof(T*) * batch_size); + auto** scratch_dev_ = reinterpret_cast(scratch_dev); - blas::cublas::cublas_native_named_func(func_name, func, err, cublas_handle, n, a_dev_, lda, ipiv32_, - scratch_dev_, lda, info_, batch_size); + blas::cublas::cublas_native_named_func(func_name, func, err, cublas_handle, n, a_dev_, + lda, ipiv32_, scratch_dev_, lda, info_, + batch_size); free(a_batched); free(scratch_batched); @@ -148,7 +149,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st }); // The inverted matrices stored in scratch_ need to be stored in a_ - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { sycl::accessor a_acc{ a, cgh, sycl::write_only }; sycl::accessor scratch_acc{ scratchpad, cgh, sycl::read_only }; cgh.parallel_for(sycl::range<1>{ static_cast( @@ -156,7 +157,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st [=](sycl::id<1> index) { a_acc[index] = scratch_acc[index]; }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { sycl::accessor ipiv32_acc{ ipiv32, cgh, sycl::read_only }; sycl::accessor ipiv_acc{ ipiv, cgh, sycl::write_only }; cgh.parallel_for(sycl::range<1>{ static_cast(ipiv32_size) }, @@ -168,10 +169,10 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st } #define GETRI_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - std::int64_t stride_a, sycl::buffer &ipiv, \ + void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + std::int64_t stride_a, sycl::buffer& ipiv, \ std::int64_t stride_ipiv, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return getri_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, n, a, lda, stride_a, ipiv, \ stride_ipiv, batch_size, scratchpad, scratchpad_size); \ } @@ -184,12 +185,12 @@ GETRI_STRIDED_BATCH_LAUNCHER(std::complex, cublasZgetriBatched) #undef GETRI_STRIDED_BATCH_LAUNCHER template -inline void getrs_batch(const char *func_name, Func func, sycl::queue &queue, +inline void getrs_batch(const char* func_name, Func func, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; @@ -201,7 +202,7 @@ inline void getrs_batch(const char *func_name, Func func, sycl::queue &queue, std::uint64_t ipiv_size = stride_ipiv * batch_size; sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -209,16 +210,16 @@ inline void getrs_batch(const char *func_name, Func func, sycl::queue &queue, }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv_acc = ipiv32.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv_ = sc.get_mem(ipiv_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv_ = sc.get_mem(ipiv_acc); + auto b_ = sc.get_mem(b_acc); cusolverStatus_t err; // Does not use scratch so call cuSolver asynchronously and sync at end @@ -235,12 +236,12 @@ inline void getrs_batch(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, \ - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, \ - std::int64_t stride_a, sycl::buffer &ipiv, \ - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, \ + void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, \ + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, \ + std::int64_t stride_a, sycl::buffer& ipiv, \ + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, \ std::int64_t stride_b, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return getrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, \ stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, \ scratchpad_size); \ @@ -254,10 +255,10 @@ GETRS_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZgetrs) #undef GETRS_STRIDED_BATCH_LAUNCHER template -inline void getrf_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +inline void getrf_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; @@ -270,17 +271,17 @@ inline void getrf_batch(const char *func_name, Func func, sycl::queue &queue, st sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); sycl::buffer devInfo{ batch_size }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -292,7 +293,7 @@ inline void getrf_batch(const char *func_name, Func func, sycl::queue &queue, st }); // Copy from 32-bit USM to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, @@ -303,10 +304,10 @@ inline void getrf_batch(const char *func_name, Func func, sycl::queue &queue, st } #define GETRF_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, \ + void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, \ std::int64_t stride_ipiv, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return getrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, stride_a, \ ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); \ } @@ -319,25 +320,25 @@ GETRF_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZgetrf) #undef GETRF_STRIDED_BATCH_LAUNCHER template -inline void orgqr_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +inline void orgqr_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, stride_a, stride_tau, batch_size, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -351,10 +352,10 @@ inline void orgqr_batch(const char *func_name, Func func, sycl::queue &queue, st } #define ORGQR_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, \ - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, \ + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return orgqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size); \ } @@ -365,30 +366,30 @@ ORGQR_STRIDED_BATCH_LAUNCHER(double, cusolverDnDorgqr) #undef ORGQR_STRIDED_BATCH_LAUNCHER template -inline void potrf_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, +inline void potrf_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, stride_a, batch_size, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); CUdeviceptr a_dev; CUresult cuda_result; cusolverStatus_t err; - auto a_ = sc.get_mem(a_acc); + auto a_ = sc.get_mem(a_acc); // Transform ptr and stride to list of ptr's - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + auto** a_dev_ = reinterpret_cast(a_dev); cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), (int)n, a_dev_, (int)lda, nullptr, (int)batch_size); @@ -401,9 +402,9 @@ inline void potrf_batch(const char *func_name, Func func, sycl::queue &queue, // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRF_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, \ - std::int64_t batch_size, sycl::buffer &scratchpad, \ + void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, \ + std::int64_t batch_size, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ return potrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, stride_a, \ batch_size, scratchpad, scratchpad_size); \ @@ -417,11 +418,11 @@ POTRF_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZpotrfBatched) #undef POTRF_STRIDED_BATCH_LAUNCHER template -inline void potrs_batch(const char *func_name, Func func, sycl::queue &queue, +inline void potrs_batch(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; @@ -431,28 +432,28 @@ inline void potrs_batch(const char *func_name, Func func, sycl::queue &queue, if (nrhs != 1) throw unimplemented("lapack", "potrs_batch", "cusolver potrs_batch only supports nrhs = 1"); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); CUdeviceptr a_dev, b_dev; cusolverStatus_t err; CUresult cuda_result; - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); // Transform ptr and stride to list of ptr's - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - cuDataType **b_batched = create_ptr_list_from_stride(b_, stride_b, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &b_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, b_dev, b_batched, sizeof(T *) * batch_size); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + cuDataType** b_batched = create_ptr_list_from_stride(b_, stride_b, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &b_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, b_dev, b_batched, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); - auto **b_dev_ = reinterpret_cast(b_dev); + auto** a_dev_ = reinterpret_cast(a_dev); + auto** b_dev_ = reinterpret_cast(b_dev); cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), (int)n, (int)nrhs, a_dev_, (int)lda, b_dev_, ldb, nullptr, @@ -468,11 +469,11 @@ inline void potrs_batch(const char *func_name, Func func, sycl::queue &queue, // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRS_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, \ - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, \ - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, \ + void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, \ + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, \ + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, \ std::int64_t stride_b, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return potrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, \ stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); \ } @@ -485,25 +486,25 @@ POTRS_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZpotrsBatched) #undef POTRS_STRIDED_BATCH_LAUNCHER template -inline void ungqr_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +inline void ungqr_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, stride_a, stride_tau, batch_size, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -517,10 +518,10 @@ inline void ungqr_batch(const char *func_name, Func func, sycl::queue &queue, st } #define UNGQR_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, \ - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, \ + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return ungqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size); \ } @@ -533,22 +534,22 @@ UNGQR_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZungqr) // BATCH USM API template -inline sycl::event geqrf_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, std::int64_t stride_a, - T *tau, std::int64_t stride_tau, std::int64_t batch_size, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event geqrf_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, std::int64_t stride_a, + T* tau, std::int64_t stride_tau, std::int64_t batch_size, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, stride_a, stride_tau, batch_size, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -564,11 +565,11 @@ inline sycl::event geqrf_batch(const char *func_name, Func func, sycl::queue &qu } #define GEQRF_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t stride_a, TYPE *tau, \ - std::int64_t stride_tau, std::int64_t batch_size, TYPE *scratchpad, \ + sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t stride_a, TYPE* tau, \ + std::int64_t stride_tau, std::int64_t batch_size, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return geqrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -582,24 +583,24 @@ GEQRF_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgeqrf) #undef GEQRF_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event geqrf_batch(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, T **a, std::int64_t *lda, T **tau, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event geqrf_batch(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, T** a, std::int64_t* lda, T** tau, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(group_count, scratchpad_size); for (int64_t i = 0; i < group_count; ++i) overflow_check(m[i], n[i], lda[i], group_sizes[i]); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); int64_t global_id = 0; cusolverStatus_t err; @@ -620,9 +621,9 @@ inline sycl::event geqrf_batch(const char *func_name, Func func, sycl::queue &qu #define GEQRF_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event geqrf_batch( \ - sycl::queue &queue, std::int64_t *m, std::int64_t *n, TYPE **a, std::int64_t *lda, \ - TYPE **tau, std::int64_t group_count, std::int64_t *group_sizes, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ + sycl::queue& queue, std::int64_t* m, std::int64_t* n, TYPE** a, std::int64_t* lda, \ + TYPE** tau, std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ return geqrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, tau, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -635,11 +636,11 @@ GEQRF_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgeqrf) #undef GEQRF_BATCH_LAUNCHER_USM template -inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event getrf_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, stride_a, stride_ipiv, batch_size, scratchpad_size); @@ -648,17 +649,17 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = stride_ipiv * batch_size; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); - int *devInfo = (int *)malloc_device(sizeof(int) * batch_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); + int* devInfo = (int*)malloc_device(sizeof(int) * batch_size, queue); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratchpad_ = reinterpret_cast(scratchpad); - auto ipiv_ = reinterpret_cast(ipiv32); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratchpad_ = reinterpret_cast(scratchpad); + auto ipiv_ = reinterpret_cast(ipiv32); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -670,14 +671,14 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu }); // Copy from 32-bit USM to 64-bit - sycl::event done_casting = queue.submit([&](sycl::handler &cgh) { + sycl::event done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = ipiv32[index]; }); }); // Enqueue free memory, don't return event as not-neccessary for user to wait for ipiv32 being released - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done_casting); cgh.host_task([=](sycl::interop_handle ih) { sycl::free(ipiv32, queue); }); }); @@ -690,11 +691,11 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu } #define GETRF_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, \ - std::int64_t stride_ipiv, std::int64_t batch_size, TYPE *scratchpad, \ + sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, \ + std::int64_t stride_ipiv, std::int64_t batch_size, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return getrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, stride_a, \ ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -708,12 +709,12 @@ GETRF_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgetrf) #undef GETRF_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, T **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, T *scratchpad, +inline sycl::event getrf_batch(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, T** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; int64_t batch_size = 0; @@ -726,19 +727,19 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu // cuSolver legacy api does not accept 64-bit ints. // To get around the limitation. // Allocate memory with 32-bit ints then copy over results - int **ipiv32 = (int **)malloc(sizeof(int *) * batch_size); + int** ipiv32 = (int**)malloc(sizeof(int*) * batch_size); int64_t global_id = 0; for (int64_t group_id = 0; group_id < group_count; ++group_id) for (int64_t local_id = 0; local_id < group_sizes[group_id]; ++local_id, ++global_id) - ipiv32[global_id] = (int *)malloc_device(sizeof(int) * n[group_id], queue); - int *devInfo = (int *)malloc_device(sizeof(int) * batch_size, queue); + ipiv32[global_id] = (int*)malloc_device(sizeof(int) * n[group_id], queue); + int* devInfo = (int*)malloc_device(sizeof(int) * batch_size, queue); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto scratch_ = reinterpret_cast(scratchpad); int64_t global_id = 0; cusolverStatus_t err; @@ -759,10 +760,10 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu for (int64_t group_id = 0, global_id = 0; group_id < group_count; ++group_id) { uint64_t ipiv_size = n[group_id]; for (int64_t local_id = 0; local_id < group_sizes[group_id]; ++local_id, ++global_id) { - int64_t *d_ipiv = ipiv[global_id]; - int *d_ipiv32 = ipiv32[global_id]; + int64_t* d_ipiv = ipiv[global_id]; + int* d_ipiv32 = ipiv32[global_id]; - sycl::event e = queue.submit([&](sycl::handler &cgh) { + sycl::event e = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { d_ipiv[index] = d_ipiv32[index]; }); @@ -772,7 +773,7 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu } // Enqueue free memory - sycl::event done_freeing = queue.submit([&](sycl::handler &cgh) { + sycl::event done_freeing = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(casting_dependencies); cgh.host_task([=](sycl::interop_handle ih) { for (int64_t global_id = 0; global_id < batch_size; ++global_id) @@ -789,11 +790,11 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu } #define GETRF_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, TYPE **a, \ - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, \ + sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, TYPE** a, \ + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return getrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -806,20 +807,20 @@ GETRF_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgetrf) #undef GETRS_BATCH_LAUNCHER_USM template -sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t n, T *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, T *scratchpad, +sycl::event getri_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t n, T* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, stride_a, stride_ipiv, batch_size, scratchpad_size); std::uint64_t ipiv32_size = n * batch_size; - int *ipiv32 = sycl::malloc_device(ipiv32_size, queue); - int *devInfo = sycl::malloc_device(batch_size, queue); + int* ipiv32 = sycl::malloc_device(ipiv32_size, queue); + int* devInfo = sycl::malloc_device(batch_size, queue); - sycl::event done_casting = queue.submit([&](sycl::handler &cgh) { + sycl::event done_casting = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl::range<1>{ static_cast(ipiv32_size) }, [=](sycl::id<1> index) { ipiv32[index] = static_cast(ipiv[(index / n) * stride_ipiv + index % n]); @@ -829,13 +830,13 @@ sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, st // getri_batched is contained within cublas, not cusolver. For this reason // we need to use cublas types instead of cusolver types (as is needed for // other lapack routines) - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { using blas::cublas::cublas_error; cgh.depends_on(done_casting); cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { cublasStatus_t err; CUresult cuda_result; cublasHandle_t cublas_handle; @@ -844,23 +845,24 @@ sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, st CUBLAS_ERROR_FUNC(cublasSetStream, err, cublas_handle, cu_stream); CUdeviceptr a_dev; - auto *a_ = reinterpret_cast(a); - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + auto* a_ = reinterpret_cast(a); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); + auto** a_dev_ = reinterpret_cast(a_dev); CUdeviceptr scratch_dev; - auto *scratch_ = reinterpret_cast(scratchpad); - cuDataType **scratch_batched = + auto* scratch_ = reinterpret_cast(scratchpad); + cuDataType** scratch_batched = create_ptr_list_from_stride(scratch_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &scratch_dev, sizeof(T *) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &scratch_dev, sizeof(T*) * batch_size); CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, scratch_dev, scratch_batched, - sizeof(T *) * batch_size); - auto **scratch_dev_ = reinterpret_cast(scratch_dev); + sizeof(T*) * batch_size); + auto** scratch_dev_ = reinterpret_cast(scratch_dev); - blas::cublas::cublas_native_named_func(func_name, func, err, cublas_handle, n, a_dev_, lda, ipiv32, - scratch_dev_, lda, devInfo, batch_size); + blas::cublas::cublas_native_named_func(func_name, func, err, cublas_handle, n, a_dev_, + lda, ipiv32, scratch_dev_, lda, devInfo, + batch_size); free(a_batched); free(scratch_batched); @@ -870,14 +872,14 @@ sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, st }); // The inverted matrices stored in scratch_ need to be stored in a_ - auto copy1 = queue.submit([&](sycl::handler &cgh) { + auto copy1 = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for( sycl::range<1>{ static_cast(stride_a * (batch_size - 1) + lda * n) }, [=](sycl::id<1> index) { a[index] = scratchpad[index]; }); }); - auto copy2 = queue.submit([&](sycl::handler &cgh) { + auto copy2 = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for( sycl::range<1>{ static_cast(ipiv32_size) }, [=](sycl::id<1> index) { @@ -893,9 +895,9 @@ sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, st #define GETRI_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event getri_batch( \ - sycl::queue &queue, std::int64_t n, TYPE *a, std::int64_t lda, std::int64_t stride_a, \ - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ + sycl::queue& queue, std::int64_t n, TYPE* a, std::int64_t lda, std::int64_t stride_a, \ + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ return getri_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, n, a, lda, stride_a, ipiv, \ stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); \ } @@ -907,41 +909,41 @@ GETRI_BATCH_LAUNCHER_USM(std::complex, cublasZgetriBatched) #undef GETRI_BATCH_LAUNCHER_USM -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } template -inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - T *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, T *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, T *scratchpad, + T* a, std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, T* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, stride_ipiv, stride_b, batch_size, scratchpad_size); @@ -950,22 +952,22 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu // To get around the limitation. // Create new memory and convert 64-bit values. std::uint64_t ipiv_size = stride_ipiv * batch_size; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv32[index] = static_cast(ipiv[index]); }); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); cgh.depends_on(done_casting); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto ipiv_ = reinterpret_cast(ipiv32); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv32); + auto b_ = reinterpret_cast(b); cusolverStatus_t err; // Does not use scratch so call cuSolver asynchronously and sync at end @@ -986,12 +988,12 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu } #define GETRS_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, std::int64_t stride_a, \ - std::int64_t *ipiv, std::int64_t stride_ipiv, TYPE *b, \ + sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t stride_a, \ + std::int64_t* ipiv, std::int64_t stride_ipiv, TYPE* b, \ std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return getrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, \ stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, \ scratchpad_size, dependencies); \ @@ -1005,13 +1007,13 @@ GETRS_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgetrs) #undef GETRS_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - T **a, std::int64_t *lda, std::int64_t **ipiv, T **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, T *scratchpad, +inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + T** a, std::int64_t* lda, std::int64_t** ipiv, T** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; int64_t batch_size = 0; @@ -1026,17 +1028,17 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu // an array of 64-bit ints in device memory. Each vec of ipiv // values need to be converted from 64-bit to 32-bit. The list // must stay on host. - int **ipiv32 = (int **)malloc(sizeof(int *) * batch_size); + int** ipiv32 = (int**)malloc(sizeof(int*) * batch_size); std::vector casting_dependencies(batch_size); int64_t global_id = 0; for (int64_t group_id = 0; group_id < group_count; ++group_id) { for (int64_t local_id = 0; local_id < group_sizes[group_id]; ++local_id, ++global_id) { uint64_t ipiv_size = n[group_id]; - int *d_group_ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* d_group_ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); ipiv32[global_id] = d_group_ipiv32; - int64_t *d_group_ipiv = ipiv[global_id]; + int64_t* d_group_ipiv = ipiv[global_id]; - auto e = queue.submit([&](sycl::handler &cgh) { + auto e = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { d_group_ipiv32[index] = static_cast(d_group_ipiv[index]); }); @@ -1045,14 +1047,14 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu } } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); cgh.depends_on(casting_dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cusolverStatus_t err; int64_t global_id = 0; @@ -1079,15 +1081,15 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu return done; } -#define GETRS_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrs_batch( \ - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, \ - TYPE **a, std::int64_t *lda, std::int64_t **ipiv, TYPE **b, std::int64_t *ldb, \ - std::int64_t group_count, std::int64_t *group_sizes, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ - return getrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, \ - ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, \ - dependencies); \ +#define GETRS_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ + sycl::event getrs_batch( \ + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, \ + TYPE** a, std::int64_t* lda, std::int64_t** ipiv, TYPE** b, std::int64_t* ldb, \ + std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ + return getrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, \ + ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, \ + dependencies); \ } GETRS_BATCH_LAUNCHER_USM(float, cusolverDnSgetrs) @@ -1098,22 +1100,22 @@ GETRS_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgetrs) #undef GETRS_BATCH_LAUNCHER_USM template -inline sycl::event orgqr_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, - std::int64_t stride_a, T *tau, std::int64_t stride_tau, - std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgqr_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, + std::int64_t stride_a, T* tau, std::int64_t stride_tau, + std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, stride_a, stride_tau, batch_size, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -1129,11 +1131,11 @@ inline sycl::event orgqr_batch(const char *func_name, Func func, sycl::queue &qu } #define ORGQR_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - TYPE *a, std::int64_t lda, std::int64_t stride_a, TYPE *tau, \ - std::int64_t stride_tau, std::int64_t batch_size, TYPE *scratchpad, \ + sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + TYPE* a, std::int64_t lda, std::int64_t stride_a, TYPE* tau, \ + std::int64_t stride_tau, std::int64_t batch_size, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return orgqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -1145,25 +1147,25 @@ ORGQR_STRIDED_BATCH_LAUNCHER_USM(double, cusolverDnDorgqr) #undef ORGQR_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event orgqr_batch(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, T **a, - std::int64_t *lda, T **tau, std::int64_t group_count, - std::int64_t *group_sizes, T *scratchpad, +inline sycl::event orgqr_batch(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, T** a, + std::int64_t* lda, T** tau, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(group_count, scratchpad_size); for (int64_t i = 0; i < group_count; ++i) overflow_check(m[i], n[i], k[i], lda[i], group_sizes[i]); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); int64_t global_id = 0; cusolverStatus_t err; @@ -1184,11 +1186,11 @@ inline sycl::event orgqr_batch(const char *func_name, Func func, sycl::queue &qu } #define ORGQR_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, \ - TYPE **a, std::int64_t *lda, TYPE **tau, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, \ + sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, \ + TYPE** a, std::int64_t* lda, TYPE** tau, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return orgqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -1199,31 +1201,31 @@ ORGQR_BATCH_LAUNCHER_USM(double, cusolverDnDorgqr) #undef ORGQR_BATCH_LAUNCHER_USM template -inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, T *scratchpad, +inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, stride_a, batch_size, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); CUdeviceptr a_dev; cusolverStatus_t err; CUresult cuda_result; - auto *a_ = reinterpret_cast(a); + auto* a_ = reinterpret_cast(a); // Transform ptr and stride to list of ptr's - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + auto** a_dev_ = reinterpret_cast(a_dev); cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), (int)n, a_dev_, (int)lda, nullptr, (int)batch_size); @@ -1237,10 +1239,10 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRF_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ + sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, stride_a, \ batch_size, scratchpad, scratchpad_size, dependencies); \ } @@ -1253,11 +1255,11 @@ POTRF_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZpotrfBatched) #undef POTRF_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, T **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, T** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; int64_t batch_size = 0; @@ -1266,19 +1268,19 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu batch_size += group_sizes[i]; } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; CUdeviceptr a_dev; CUresult cuda_result; cusolverStatus_t err; - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a, sizeof(T *) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + auto** a_dev_ = reinterpret_cast(a_dev); // Does not use scratch so call cuSolver asynchronously and sync at end for (int64_t i = 0; i < group_count; i++) { @@ -1298,13 +1300,13 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu } // Scratchpad memory not needed as parts of buffer a is used as workspace memory -#define POTRF_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event potrf_batch( \ - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, TYPE **a, std::int64_t *lda, \ - std::int64_t group_count, std::int64_t *group_sizes, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ - return potrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, \ - group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ +#define POTRF_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ + sycl::event potrf_batch( \ + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, TYPE** a, \ + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ + return potrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, \ + group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } POTRF_BATCH_LAUNCHER_USM(float, cusolverDnSpotrfBatched) @@ -1315,12 +1317,12 @@ POTRF_BATCH_LAUNCHER_USM(std::complex, cusolverDnZpotrfBatched) #undef POTRF_BATCH_LAUNCHER_USM template -inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, std::int64_t stride_a, T *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, T *scratchpad, +inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, std::int64_t stride_a, T* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, stride_a, stride_b, batch_size, scratchpad_size); @@ -1329,26 +1331,26 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu if (nrhs != 1) throw unimplemented("lapack", "potrs_batch", "cusolver potrs_batch only supports nrhs = 1"); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); CUresult cuda_result; CUdeviceptr a_dev, b_dev; - auto *a_ = reinterpret_cast(a); - auto *b_ = reinterpret_cast(b); + auto* a_ = reinterpret_cast(a); + auto* b_ = reinterpret_cast(b); cusolverStatus_t err; // Transform ptr and stride to list of ptr's - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - cuDataType **b_batched = create_ptr_list_from_stride(b_, stride_b, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &b_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, b_dev, b_batched, sizeof(T *) * batch_size); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + cuDataType** b_batched = create_ptr_list_from_stride(b_, stride_b, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &b_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, b_dev, b_batched, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); - auto **b_dev_ = reinterpret_cast(b_dev); + auto** a_dev_ = reinterpret_cast(a_dev); + auto** b_dev_ = reinterpret_cast(b_dev); cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), (int)n, (int)nrhs, a_dev_, (int)lda, b_dev_, ldb, nullptr, @@ -1365,10 +1367,10 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRS_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event potrs_batch( \ - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, TYPE *a, \ - std::int64_t lda, std::int64_t stride_a, TYPE *b, std::int64_t ldb, std::int64_t stride_b, \ - std::int64_t batch_size, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, TYPE* a, \ + std::int64_t lda, std::int64_t stride_a, TYPE* b, std::int64_t ldb, std::int64_t stride_b, \ + std::int64_t batch_size, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, \ stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -1382,12 +1384,12 @@ POTRS_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZpotrsBatched) #undef POTRS_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, T **a, - std::int64_t *lda, T **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, T** a, + std::int64_t* lda, T** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; int64_t batch_size = 0; @@ -1401,29 +1403,29 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu "cusolver potrs_batch only supports nrhs = 1"); } - int *info = (int *)malloc_device(sizeof(int *) * batch_size, queue); - T **a_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); - T **b_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); + int* info = (int*)malloc_device(sizeof(int*) * batch_size, queue); + T** a_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); + T** b_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); auto done_cpy_a = - queue.submit([&](sycl::handler &h) { h.memcpy(a_dev, a, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(a_dev, a, batch_size * sizeof(T*)); }); auto done_cpy_b = - queue.submit([&](sycl::handler &h) { h.memcpy(b_dev, b, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(b_dev, b, batch_size * sizeof(T*)); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); cgh.depends_on(done_cpy_a); cgh.depends_on(done_cpy_b); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; cusolverStatus_t err; // Does not use scratch so call cuSolver asynchronously and sync at end for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a_dev); - auto **b_ = reinterpret_cast(b_dev); - auto info_ = reinterpret_cast(info); + auto** a_ = reinterpret_cast(a_dev); + auto** b_ = reinterpret_cast(b_dev); + auto info_ = reinterpret_cast(info); CUSOLVER_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(uplo[i]), (int)n[i], (int)nrhs[i], a_ + offset, (int)lda[i], b_ + offset, (int)ldb[i], info_, (int)group_sizes[i]); @@ -1440,10 +1442,10 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRS_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event potrs_batch( \ - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, \ - TYPE **a, std::int64_t *lda, TYPE **b, std::int64_t *ldb, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, \ + TYPE** a, std::int64_t* lda, TYPE** b, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, \ ldb, group_count, group_sizes, scratchpad, scratchpad_size, \ dependencies); \ @@ -1457,22 +1459,22 @@ POTRS_BATCH_LAUNCHER_USM(std::complex, cusolverDnZpotrsBatched) #undef POTRS_BATCH_LAUNCHER_USM template -inline sycl::event ungqr_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, - std::int64_t stride_a, T *tau, std::int64_t stride_tau, - std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungqr_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, + std::int64_t stride_a, T* tau, std::int64_t stride_tau, + std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, stride_a, stride_tau, batch_size, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -1488,11 +1490,11 @@ inline sycl::event ungqr_batch(const char *func_name, Func func, sycl::queue &qu } #define UNGQR_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - TYPE *a, std::int64_t lda, std::int64_t stride_a, TYPE *tau, \ - std::int64_t stride_tau, std::int64_t batch_size, TYPE *scratchpad, \ + sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + TYPE* a, std::int64_t lda, std::int64_t stride_a, TYPE* tau, \ + std::int64_t stride_tau, std::int64_t batch_size, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ungqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -1504,25 +1506,25 @@ UNGQR_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZungqr) #undef UNGQR_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event ungqr_batch(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, T **a, - std::int64_t *lda, T **tau, std::int64_t group_count, - std::int64_t *group_sizes, T *scratchpad, +inline sycl::event ungqr_batch(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, T** a, + std::int64_t* lda, T** tau, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(group_count, scratchpad_size); for (int64_t i = 0; i < group_count; ++i) overflow_check(m[i], n[i], k[i], lda[i], group_sizes[i]); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); int64_t global_id = 0; cusolverStatus_t err; @@ -1543,11 +1545,11 @@ inline sycl::event ungqr_batch(const char *func_name, Func func, sycl::queue &qu } #define UNGQR_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, \ - TYPE **a, std::int64_t *lda, TYPE **tau, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, \ + sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, \ + TYPE** a, std::int64_t* lda, TYPE** tau, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ungqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -1560,12 +1562,12 @@ UNGQR_BATCH_LAUNCHER_USM(std::complex, cusolverDnZungqr) // BATCH SCRATCHPAD API template -inline void getrf_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void getrf_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, - std::int64_t batch_size, int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + std::int64_t batch_size, int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cusolverStatus_t err; @@ -1611,13 +1613,13 @@ GETRI_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex) #undef GETRI_STRIDED_BATCH_LAUNCHER_SCRATCH // cusolverDnXgetrs does not use scratchpad memory -#define GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t getrs_batch_scratchpad_size( \ +#define GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t getrs_batch_scratchpad_size( \ sycl::queue & queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, \ - std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, \ - std::int64_t stride_b, std::int64_t batch_size) { \ - return 0; \ + std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, \ + std::int64_t stride_b, std::int64_t batch_size) { \ + return 0; \ } GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH(float) @@ -1628,12 +1630,12 @@ GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex) #undef GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH template -inline void geqrf_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void geqrf_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, - std::int64_t batch_size, int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + std::int64_t batch_size, int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cusolverStatus_t err; @@ -1662,12 +1664,12 @@ GEQRF_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex, cusolverDnZgeqrf_buff #undef GEQRF_STRIDED_BATCH_LAUNCHER_SCRATCH // cusolverDnXpotrfBatched does not use scratchpad memory -#define POTRF_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t potrf_batch_scratchpad_size( \ +#define POTRF_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrf_batch_scratchpad_size( \ sycl::queue & queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, \ - std::int64_t stride_a, std::int64_t batch_size) { \ - return 0; \ + std::int64_t stride_a, std::int64_t batch_size) { \ + return 0; \ } POTRF_STRIDED_BATCH_LAUNCHER_SCRATCH(float) @@ -1681,7 +1683,7 @@ POTRF_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex) #define POTRS_STRIDED_BATCH_LAUNCHER_SCRATCH(TYPE) \ template <> \ std::int64_t potrs_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, \ + sycl::queue & queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, \ std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, \ std::int64_t batch_size) { \ return 0; \ @@ -1695,13 +1697,13 @@ POTRS_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex) #undef POTRS_STRIDED_BATCH_LAUNCHER_SCRATCH template -inline void orgqr_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void orgqr_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size, - int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cusolverStatus_t err; @@ -1729,13 +1731,13 @@ ORGQR_STRIDED_BATCH_LAUNCHER_SCRATCH(double, cusolverDnDorgqr_bufferSize) #undef ORGQR_STRIDED_BATCH_LAUNCHER_SCRATCH template -inline void ungqr_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ungqr_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size, - int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cusolverStatus_t err; @@ -1763,12 +1765,12 @@ ORGQR_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex, cusolverDnZungqr_buff #undef ORGQR_STRIDED_BATCH_LAUNCHER_SCRATCH template -inline void getrf_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { +inline void getrf_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int group_scratch_size = 0; *scratch_size = 0; @@ -1789,8 +1791,8 @@ inline void getrf_batch_scratchpad_size(const char *func_name, Func func, sycl:: #define GETRF_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ std::int64_t getrf_batch_scratchpad_size( \ - sycl::queue & queue, std::int64_t * m, std::int64_t * n, std::int64_t * lda, \ - std::int64_t group_count, std::int64_t * group_sizes) { \ + sycl::queue & queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ int scratch_size; \ getrf_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, lda, \ group_count, group_sizes, &scratch_size); \ @@ -1804,18 +1806,18 @@ GETRF_GROUP_LAUNCHER_SCRATCH(std::complex, cusolverDnZgetrf_bufferSize) #undef GETRF_GROUP_LAUNCHER_SCRATCH -#define GETRI_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t getri_batch_scratchpad_size(sycl::queue & queue, std::int64_t * n, \ - std::int64_t * lda, std::int64_t group_count, \ - std::int64_t * group_sizes) { \ - std::int64_t max_scratch_sz = 0; \ - for (auto group_id = 0; group_id < group_count; ++group_id) { \ - auto scratch_sz = lda[group_id] * n[group_id]; \ - if (scratch_sz > max_scratch_sz) \ - max_scratch_sz = scratch_sz; \ - } \ - return max_scratch_sz; \ +#define GETRI_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t getri_batch_scratchpad_size(sycl::queue & queue, std::int64_t* n, \ + std::int64_t* lda, std::int64_t group_count, \ + std::int64_t* group_sizes) { \ + std::int64_t max_scratch_sz = 0; \ + for (auto group_id = 0; group_id < group_count; ++group_id) { \ + auto scratch_sz = lda[group_id] * n[group_id]; \ + if (scratch_sz > max_scratch_sz) \ + max_scratch_sz = scratch_sz; \ + } \ + return max_scratch_sz; \ } GETRI_GROUP_LAUNCHER_SCRATCH(float) @@ -1825,13 +1827,13 @@ GETRI_GROUP_LAUNCHER_SCRATCH(std::complex) #undef GETRI_GROUP_LAUNCHER_SCRATCH -#define GETRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t getrs_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::math::transpose * trans, std::int64_t * n, \ - std::int64_t * nrhs, std::int64_t * lda, std::int64_t * ldb, std::int64_t group_count, \ - std::int64_t * group_sizes) { \ - return 0; \ +#define GETRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t getrs_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::math::transpose * trans, std::int64_t* n, std::int64_t* nrhs, \ + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes) { \ + return 0; \ } GETRS_GROUP_LAUNCHER_SCRATCH(float) @@ -1842,12 +1844,12 @@ GETRS_GROUP_LAUNCHER_SCRATCH(std::complex) #undef GETRS_GROUP_LAUNCHER_SCRATCH template -inline void geqrf_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { +inline void geqrf_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int group_scratch_size = 0; *scratch_size = 0; @@ -1868,8 +1870,8 @@ inline void geqrf_batch_scratchpad_size(const char *func_name, Func func, sycl:: #define GEQRF_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ std::int64_t geqrf_batch_scratchpad_size( \ - sycl::queue & queue, std::int64_t * m, std::int64_t * n, std::int64_t * lda, \ - std::int64_t group_count, std::int64_t * group_sizes) { \ + sycl::queue & queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ int scratch_size; \ geqrf_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, lda, \ group_count, group_sizes, &scratch_size); \ @@ -1884,12 +1886,12 @@ GEQRF_GROUP_LAUNCHER_SCRATCH(std::complex, cusolverDnZgeqrf_bufferSize) #undef GEQRF_GROUP_LAUNCHER_SCRATCH template -inline void orgqr_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { +inline void orgqr_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int group_scratch_size = 0; *scratch_size = 0; @@ -1908,15 +1910,15 @@ inline void orgqr_batch_scratchpad_size(const char *func_name, Func func, sycl:: e.wait(); } -#define ORGQR_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ - std::int64_t orgqr_batch_scratchpad_size( \ - sycl::queue & queue, std::int64_t * m, std::int64_t * n, std::int64_t * k, \ - std::int64_t * lda, std::int64_t group_count, std::int64_t * group_sizes) { \ - int scratch_size; \ - orgqr_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, lda, \ - group_count, group_sizes, &scratch_size); \ - return scratch_size; \ +#define ORGQR_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ + std::int64_t orgqr_batch_scratchpad_size( \ + sycl::queue & queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ + int scratch_size; \ + orgqr_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, lda, \ + group_count, group_sizes, &scratch_size); \ + return scratch_size; \ } ORGQR_GROUP_LAUNCHER_SCRATCH(float, cusolverDnSorgqr_bufferSize) @@ -1925,12 +1927,12 @@ ORGQR_GROUP_LAUNCHER_SCRATCH(double, cusolverDnDorgqr_bufferSize) #undef ORGQR_GROUP_LAUNCHER_SCRATCH // cusolverDnXpotrfBatched does not use scratchpad memory -#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t potrf_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t * n, std::int64_t * lda, \ - std::int64_t group_count, std::int64_t * group_sizes) { \ - return 0; \ +#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrf_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ + return 0; \ } POTRF_GROUP_LAUNCHER_SCRATCH(float) @@ -1941,13 +1943,13 @@ POTRF_GROUP_LAUNCHER_SCRATCH(std::complex) #undef POTRF_GROUP_LAUNCHER_SCRATCH // cusolverDnXpotrsBatched does not use scratchpad memory -#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t potrs_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t * n, std::int64_t * nrhs, \ - std::int64_t * lda, std::int64_t * ldb, std::int64_t group_count, \ - std::int64_t * group_sizes) { \ - return 0; \ +#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrs_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* nrhs, \ + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes) { \ + return 0; \ } POTRS_GROUP_LAUNCHER_SCRATCH(float) @@ -1958,12 +1960,12 @@ POTRS_GROUP_LAUNCHER_SCRATCH(std::complex) #undef POTRS_GROUP_LAUNCHER_SCRATCH template -inline void ungqr_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { +inline void ungqr_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int group_scratch_size = 0; *scratch_size = 0; @@ -1982,15 +1984,15 @@ inline void ungqr_batch_scratchpad_size(const char *func_name, Func func, sycl:: e.wait(); } -#define UNGQR_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ - std::int64_t ungqr_batch_scratchpad_size( \ - sycl::queue & queue, std::int64_t * m, std::int64_t * n, std::int64_t * k, \ - std::int64_t * lda, std::int64_t group_count, std::int64_t * group_sizes) { \ - int scratch_size; \ - ungqr_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, lda, \ - group_count, group_sizes, &scratch_size); \ - return scratch_size; \ +#define UNGQR_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ + std::int64_t ungqr_batch_scratchpad_size( \ + sycl::queue & queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ + int scratch_size; \ + ungqr_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, lda, \ + group_count, group_sizes, &scratch_size); \ + return scratch_size; \ } UNGQR_GROUP_LAUNCHER_SCRATCH(std::complex, cusolverDnCungqr_bufferSize) diff --git a/src/lapack/backends/cusolver/cusolver_handle.hpp b/src/lapack/backends/cusolver/cusolver_handle.hpp index 269295cd5..53be426f5 100644 --- a/src/lapack/backends/cusolver/cusolver_handle.hpp +++ b/src/lapack/backends/cusolver/cusolver_handle.hpp @@ -28,10 +28,10 @@ namespace cusolver { template struct cusolver_handle { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t cusolver_handle_mapper_{}; ~cusolver_handle() noexcept(false) { - for (auto &handle_pair : cusolver_handle_mapper_) { + for (auto& handle_pair : cusolver_handle_mapper_) { cusolverStatus_t err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); diff --git a/src/lapack/backends/cusolver/cusolver_helper.hpp b/src/lapack/backends/cusolver/cusolver_helper.hpp index f51827f9f..52da00bb6 100644 --- a/src/lapack/backends/cusolver/cusolver_helper.hpp +++ b/src/lapack/backends/cusolver/cusolver_helper.hpp @@ -82,7 +82,7 @@ void overflow_check(Index index, Next... indices) { class cusolver_error : virtual public std::runtime_error { protected: - inline const char *cusolver_error_map(cusolverStatus_t error) { + inline const char* cusolver_error_map(cusolverStatus_t error) { switch (error) { case CUSOLVER_STATUS_SUCCESS: return "CUSOLVER_STATUS_SUCCESS"; @@ -131,7 +131,7 @@ class cusolver_error : virtual public std::runtime_error { class cuda_error : virtual public std::runtime_error { protected: - inline const char *cuda_error_map(CUresult result) { + inline const char* cuda_error_map(CUresult result) { switch (result) { case CUDA_SUCCESS: return "CUDA_SUCCESS"; case CUDA_ERROR_NOT_PERMITTED: return "CUDA_ERROR_NOT_PERMITTED"; @@ -201,9 +201,8 @@ class cuda_error : virtual public std::runtime_error { CUSOLVER_SYNC(err, handle) template -inline void cusolver_native_named_func(const char *func_name, Func func, - cusolverStatus_t err, - cusolverDnHandle_t handle, Types... args){ +inline void cusolver_native_named_func(const char* func_name, Func func, cusolverStatus_t err, + cusolverDnHandle_t handle, Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND CUSOLVER_ERROR_FUNC_T(func_name, func, err, handle, args...) #else @@ -291,25 +290,25 @@ struct CudaEquivalentType> { /* devinfo */ -inline void get_cusolver_devinfo(sycl::queue &queue, sycl::buffer &devInfo, - std::vector &dev_info_) { +inline void get_cusolver_devinfo(sycl::queue& queue, sycl::buffer& devInfo, + std::vector& dev_info_) { sycl::host_accessor dev_info_acc{ devInfo }; for (unsigned int i = 0; i < dev_info_.size(); ++i) dev_info_[i] = dev_info_acc[i]; } -inline void get_cusolver_devinfo(sycl::queue &queue, const int *devInfo, - std::vector &dev_info_) { +inline void get_cusolver_devinfo(sycl::queue& queue, const int* devInfo, + std::vector& dev_info_) { queue.wait(); queue.memcpy(dev_info_.data(), devInfo, sizeof(int)); } template -inline void lapack_info_check(sycl::queue &queue, DEVINFO_T devinfo, const char *func_name, - const char *cufunc_name, int dev_info_size = 1) { +inline void lapack_info_check(sycl::queue& queue, DEVINFO_T devinfo, const char* func_name, + const char* cufunc_name, int dev_info_size = 1) { std::vector dev_info_(dev_info_size); get_cusolver_devinfo(queue, devinfo, dev_info_); - for (const auto &val : dev_info_) { + for (const auto& val : dev_info_) { if (val > 0) throw oneapi::math::lapack::computation_error( func_name, std::string(cufunc_name) + " failed with info = " + std::to_string(val), @@ -322,8 +321,8 @@ inline void lapack_info_check(sycl::queue &queue, DEVINFO_T devinfo, const char // Creates list of matrix/vector pointers from initial ptr and stride // Note: user is responsible for deallocating memory template -T **create_ptr_list_from_stride(T *ptr, int64_t ptr_stride, int64_t batch_size) { - T **ptr_list = (T **)malloc(sizeof(T *) * batch_size); +T** create_ptr_list_from_stride(T* ptr, int64_t ptr_stride, int64_t batch_size) { + T** ptr_list = (T**)malloc(sizeof(T*) * batch_size); for (int64_t i = 0; i < batch_size; i++) ptr_list[i] = ptr + i * ptr_stride; diff --git a/src/lapack/backends/cusolver/cusolver_lapack.cpp b/src/lapack/backends/cusolver/cusolver_lapack.cpp index 52d28edc3..6a5427712 100644 --- a/src/lapack/backends/cusolver/cusolver_lapack.cpp +++ b/src/lapack/backends/cusolver/cusolver_lapack.cpp @@ -30,10 +30,10 @@ namespace cusolver { // BUFFER APIs template -inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void gebrd(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -41,21 +41,21 @@ inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int if (m < n) throw unimplemented("lapack", "gebrd", "cusolver gebrd does not support m < n"); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tauq_acc = tauq.template get_access(cgh); auto taup_acc = taup.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tauq_ = sc.get_mem(tauq_acc); - auto taup_ = sc.get_mem(taup_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tauq_ = sc.get_mem(tauq_acc); + auto taup_ = sc.get_mem(taup_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, d_, e_, tauq_, taup_, scratch_, scratchpad_size, nullptr); @@ -64,10 +64,10 @@ inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int } #define GEBRD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, \ - sycl::buffer &tauq, sycl::buffer &taup, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tauq, sycl::buffer& taup, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ gebrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, \ scratchpad, scratchpad_size); \ } @@ -79,43 +79,43 @@ GEBRD_LAUNCHER(std::complex, double, cusolverDnZgebrd) #undef GEBRD_LAUNCHER -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } template -inline void geqrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void geqrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -124,8 +124,8 @@ inline void geqrf(const char *func_name, Func func, sycl::queue &queue, std::int } #define GEQRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ + void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ geqrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, tau, scratchpad, \ scratchpad_size); \ @@ -139,9 +139,9 @@ GEQRF_LAUNCHER(std::complex, cusolverDnZgeqrf) #undef GEQRF_LAUNCHER template -void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -152,17 +152,17 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, scratch_, ipiv32_, devInfo_); @@ -170,7 +170,7 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, }); // Copy from 32-bit buffer to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -181,8 +181,8 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, } #define GETRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ getrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, scratchpad, \ scratchpad_size); \ @@ -196,8 +196,8 @@ GETRF_LAUNCHER(std::complex, cusolverDnZgetrf) #undef GETRF_LAUNCHER #define GETRI_LAUNCHER(TYPE) \ - void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ return getri_batch(queue, n, a, lda, lda * n, ipiv, n, 1, scratchpad, scratchpad_size); \ } @@ -211,10 +211,10 @@ GETRI_LAUNCHER(std::complex) // cusolverDnXgetrs does not use scratchpad memory template -inline void getrs(const char *func_name, Func func, sycl::queue &queue, +inline void getrs(const char* func_name, Func func, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb); @@ -225,7 +225,7 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, std::uint64_t ipiv_size = ipiv.size(); sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -233,15 +233,15 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv_acc = ipiv32.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv_ = sc.get_mem(ipiv_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv_ = sc.get_mem(ipiv_acc); + auto b_ = sc.get_mem(b_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), n, nrhs, a_, lda, ipiv_, b_, ldb, nullptr); @@ -250,10 +250,10 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, \ - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, \ + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ getrs(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, b, ldb, \ scratchpad, scratchpad_size); \ } @@ -266,30 +266,30 @@ GETRS_LAUNCHER(std::complex, cusolverDnZgetrs) #undef GETRS_LAUNCHER template -inline void gesvd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, +inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, m, lda, ldu, ldvt, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto s_acc = s.template get_access(cgh); auto u_acc = u.template get_access(cgh); auto vt_acc = vt.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto s_ = sc.get_mem(s_acc); - auto u_ = sc.get_mem(u_acc); - auto vt_ = sc.get_mem(vt_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto s_ = sc.get_mem(s_acc); + auto u_ = sc.get_mem(u_acc); + auto vt_ = sc.get_mem(vt_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // rwork is set to nullptr. If set it is filled with information from the superdiagonal. cusolver_native_named_func(func_name, func, err, handle, get_cusolver_jobsvd(jobu), @@ -301,10 +301,10 @@ inline void gesvd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define GESVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, \ - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, \ + void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, \ + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ gesvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, u, ldu, \ vt, ldvt, scratchpad, scratchpad_size); \ @@ -318,25 +318,25 @@ GESVD_LAUNCHER(std::complex, double, cusolverDnZgesvd) #undef GESVD_LAUNCHER template -inline void heevd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -346,12 +346,12 @@ inline void heevd(const char *func_name, Func func, sycl::queue &queue, oneapi:: lapack_info_check(queue, devInfo, __func__, func_name); } -#define HEEVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ - heevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ - scratchpad_size); \ +#define HEEVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ + void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + heevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ + scratchpad_size); \ } HEEVD_LAUNCHER(std::complex, float, cusolverDnCheevd) @@ -360,28 +360,28 @@ HEEVD_LAUNCHER(std::complex, double, cusolverDnZheevd) #undef HEEVD_LAUNCHER template -inline void hegvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, +inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_itype(itype), get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, @@ -392,10 +392,10 @@ inline void hegvd(const char *func_name, Func func, sycl::queue &queue, std::int } #define HEGVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ hegvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w, \ scratchpad, scratchpad_size); \ } @@ -406,29 +406,29 @@ HEGVD_LAUNCHER(std::complex, double, cusolverDnZhegvd) #undef HEGVD_LAUNCHER template -inline void hetrd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tau_ = sc.get_mem(tau_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tau_ = sc.get_mem(tau_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, d_, e_, tau_, scratch_, scratchpad_size, devInfo_); @@ -438,10 +438,10 @@ inline void hetrd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define HETRD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, \ - sycl::buffer &e, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, \ + sycl::buffer& e, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ hetrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, \ scratchpad_size); \ } @@ -451,34 +451,34 @@ HETRD_LAUNCHER(std::complex, double, cusolverDnZhetrd) #undef HETRD_LAUNCHER -void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "hetrf"); } -void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "hetrf"); } template -inline void orgbr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -487,9 +487,9 @@ inline void orgbr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORGBR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ orgbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -500,20 +500,20 @@ ORGBR_LAUNCHER(double, cusolverDnDorgbr) #undef ORGBR_LAUNCHER template -inline void orgqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void orgqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -522,9 +522,9 @@ inline void orgqr(const char *func_name, Func func, sycl::queue &queue, std::int } #define ORGQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ orgqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -535,20 +535,20 @@ ORGQR_LAUNCHER(double, cusolverDnDorgqr) #undef ORGQR_LAUNCHER template -inline void orgtr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -556,12 +556,12 @@ inline void orgtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: }); } -#define ORGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ - std::int64_t scratchpad_size) { \ - orgtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ - scratchpad_size); \ +#define ORGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ + void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + orgtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ + scratchpad_size); \ } ORGTR_LAUNCHER(float, cusolverDnSorgtr) @@ -570,24 +570,24 @@ ORGTR_LAUNCHER(double, cusolverDnDorgtr) #undef ORGTR_LAUNCHER template -inline void ormtr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::side side, +inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, @@ -598,10 +598,10 @@ inline void ormtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORMTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, \ - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, \ + void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ ormtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ @@ -612,37 +612,37 @@ ORMTR_LAUNCHER(double, cusolverDnDormtr) #undef ORMTR_LAUNCHER -void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ormrq"); } -void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ormrq"); } template -inline void ormqr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::side side, +inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_operation(trans), m, n, k, a_, lda, tau_, c_, ldc, @@ -652,10 +652,10 @@ inline void ormqr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORMQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ormqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ } @@ -666,21 +666,21 @@ ORMQR_LAUNCHER(double, cusolverDnDormqr) #undef ORMQR_LAUNCHER template -inline void potrf(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, scratch_, scratchpad_size, devInfo_); @@ -689,11 +689,11 @@ inline void potrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: lapack_info_check(queue, devInfo, __func__, func_name); } -#define POTRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ - potrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ - scratchpad_size); \ +#define POTRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ + void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + potrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ + scratchpad_size); \ } POTRF_LAUNCHER(float, cusolverDnSpotrf) @@ -704,21 +704,21 @@ POTRF_LAUNCHER(std::complex, cusolverDnZpotrf) #undef POTRF_LAUNCHER template -inline void potri(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, scratch_, scratchpad_size, devInfo_); @@ -727,11 +727,11 @@ inline void potri(const char *func_name, Func func, sycl::queue &queue, oneapi:: lapack_info_check(queue, devInfo, __func__, func_name); } -#define POTRI_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ - potri(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ - scratchpad_size); \ +#define POTRI_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ + void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + potri(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ + scratchpad_size); \ } POTRI_LAUNCHER(float, cusolverDnSpotri) @@ -743,19 +743,19 @@ POTRI_LAUNCHER(std::complex, cusolverDnZpotri) // cusolverDnXpotrs does not use scratchpad memory template -inline void potrs(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, nrhs, a_, lda, b_, ldb, nullptr); @@ -764,9 +764,9 @@ inline void potrs(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define POTRS_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ potrs(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb, \ scratchpad, scratchpad_size); \ } @@ -779,23 +779,23 @@ POTRS_LAUNCHER(std::complex, cusolverDnZpotrs) #undef POTRS_LAUNCHER template -inline void syevd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -805,12 +805,12 @@ inline void syevd(const char *func_name, Func func, sycl::queue &queue, oneapi:: lapack_info_check(queue, devInfo, __func__, func_name); } -#define SYEVD_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ - syevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ - scratchpad_size); \ +#define SYEVD_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ + void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + syevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ + scratchpad_size); \ } SYEVD_LAUNCHER(float, cusolverDnSsyevd) @@ -819,26 +819,26 @@ SYEVD_LAUNCHER(double, cusolverDnDsyevd) #undef SYEVD_LAUNCHER template -inline void sygvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_itype(itype), get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, @@ -849,10 +849,10 @@ inline void sygvd(const char *func_name, Func func, sycl::queue &queue, std::int } #define SYGVD_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ sygvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w, \ scratchpad, scratchpad_size); \ } @@ -863,28 +863,28 @@ SYGVD_LAUNCHER(double, cusolverDnDsygvd) #undef SYGVD_LAUNCH template -inline void sytrd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tau_ = sc.get_mem(tau_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tau_ = sc.get_mem(tau_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, d_, e_, tau_, scratch_, scratchpad_size, devInfo_); @@ -893,13 +893,13 @@ inline void sytrd(const char *func_name, Func func, sycl::queue &queue, oneapi:: lapack_info_check(queue, devInfo, __func__, func_name); } -#define SYTRD_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, \ - sycl::buffer &tau, sycl::buffer &scratchpad, \ - std::int64_t scratchpad_size) { \ - sytrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, \ - scratchpad_size); \ +#define SYTRD_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ + void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + sytrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, \ + scratchpad_size); \ } SYTRD_LAUNCHER(float, cusolverDnSsytrd) @@ -908,9 +908,9 @@ SYTRD_LAUNCHER(double, cusolverDnDsytrd) #undef SYTRD_LAUNCHER template -inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); @@ -922,17 +922,17 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: std::uint64_t ipiv_size = n; sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, ipiv32_, scratch_, scratchpad_size, devInfo_); @@ -940,7 +940,7 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: }); // Copy from 32-bit buffer to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -951,8 +951,8 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define SYTRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ sytrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad, \ scratchpad_size); \ @@ -965,49 +965,49 @@ SYTRF_LAUNCHER(std::complex, cusolverDnZsytrf) #undef SYTRF_LAUNCHER -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } template -inline void ungbr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1016,9 +1016,9 @@ inline void ungbr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNGBR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ungbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -1029,20 +1029,20 @@ UNGBR_LAUNCHER(std::complex, cusolverDnZungbr) #undef UNGBR_LAUNCHER template -inline void ungqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void ungqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1051,9 +1051,9 @@ inline void ungqr(const char *func_name, Func func, sycl::queue &queue, std::int } #define UNGQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ungqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -1064,20 +1064,20 @@ UNGQR_LAUNCHER(std::complex, cusolverDnZungqr) #undef UNGQR_LAUNCHER template -inline void ungtr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1085,12 +1085,12 @@ inline void ungtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: }); } -#define UNGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ - std::int64_t scratchpad_size) { \ - ungtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ - scratchpad_size); \ +#define UNGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ + void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + ungtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ + scratchpad_size); \ } UNGTR_LAUNCHER(std::complex, cusolverDnCungtr) @@ -1098,39 +1098,39 @@ UNGTR_LAUNCHER(std::complex, cusolverDnZungtr) #undef UNGTR_LAUNCHER -void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "unmrq"); } -void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "unmrq"); } template -inline void unmqr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::side side, +inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_operation(trans), m, n, k, a_, lda, tau_, c_, ldc, @@ -1140,10 +1140,10 @@ inline void unmqr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNMQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ unmqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ } @@ -1154,24 +1154,24 @@ UNMQR_LAUNCHER(std::complex, cusolverDnZunmqr) #undef UNMQR_LAUNCHER template -inline void unmtr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::side side, +inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, @@ -1182,10 +1182,10 @@ inline void unmtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNMTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, \ - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, \ + void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ unmtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ @@ -1199,10 +1199,10 @@ UNMTR_LAUNCHER(std::complex, cusolverDnZunmtr) // USM APIs template -inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T_A *a, std::int64_t lda, T_B *d, T_B *e, T_A *tauq, - T_A *taup, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event gebrd(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T_A* a, std::int64_t lda, T_B* d, T_B* e, T_A* tauq, + T_A* taup, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -1210,19 +1210,19 @@ inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, s if (m < n) throw unimplemented("lapack", "gebrd", "cusolver gebrd does not support m < n"); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tauq_ = reinterpret_cast(tauq); - auto taup_ = reinterpret_cast(taup); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tauq_ = reinterpret_cast(tauq); + auto taup_ = reinterpret_cast(taup); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, d_, e_, tauq_, taup_, scratch_, scratchpad_size, nullptr); @@ -1232,10 +1232,10 @@ inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, s } #define GEBRD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE_A *a, \ - std::int64_t lda, TYPE_B *d, TYPE_B *e, TYPE_A *tauq, TYPE_A *taup, \ - TYPE_A *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tauq, TYPE_A* taup, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return gebrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1247,44 +1247,44 @@ GEBRD_LAUNCHER_USM(std::complex, double, cusolverDnZgebrd) #undef GEBRD_LAUNCHER_USM -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } template -inline sycl::event geqrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, T *tau, T *scratchpad, +inline sycl::event geqrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1294,9 +1294,9 @@ inline sycl::event geqrf(const char *func_name, Func func, sycl::queue &queue, s } #define GEQRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return geqrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1309,10 +1309,10 @@ GEQRF_LAUNCHER_USM(std::complex, cusolverDnZgeqrf) #undef GEQRF_LAUNCHER_USM template -inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, std::int64_t *ipiv, T *scratchpad, +inline sycl::event getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -1320,20 +1320,20 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = std::min(n, m); - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); - auto ipiv_ = reinterpret_cast(ipiv32); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); + auto ipiv_ = reinterpret_cast(ipiv32); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, scratch_, ipiv_, devInfo_); @@ -1341,7 +1341,7 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s }); // Copy from 32-bit USM to 64-bit - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = static_cast(ipiv32[index]); @@ -1358,10 +1358,10 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s } #define GETRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t *ipiv, TYPE *scratchpad, \ + sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return getrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1374,9 +1374,9 @@ GETRF_LAUNCHER_USM(std::complex, cusolverDnZgetrf) #undef GETRF_LAUNCHER_USM #define GETRI_LAUNCHER_USM(TYPE) \ - sycl::event getri(sycl::queue &queue, std::int64_t n, TYPE *a, std::int64_t lda, \ - std::int64_t *ipiv, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event getri(sycl::queue& queue, std::int64_t n, TYPE* a, std::int64_t lda, \ + std::int64_t* ipiv, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return getri_batch(queue, n, a, lda, lda * n, ipiv, n, 1, scratchpad, scratchpad_size, \ dependencies); \ } @@ -1390,11 +1390,11 @@ GETRI_LAUNCHER_USM(std::complex) // cusolverDnXgetrs does not use scratchpad memory template -inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, std::int64_t *ipiv, T *b, std::int64_t ldb, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, std::int64_t* ipiv, T* b, std::int64_t ldb, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); @@ -1402,25 +1402,25 @@ inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, // To get around the limitation. // Create new buffer and convert 64-bit values. std::uint64_t ipiv_size = n; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv32[index] = static_cast(ipiv[index]); }); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } cgh.depends_on(done_casting); - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto ipiv_ = reinterpret_cast(ipiv32); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv32); + auto b_ = reinterpret_cast(b); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), n, nrhs, a_, lda, ipiv_, b_, ldb, nullptr); @@ -1435,10 +1435,10 @@ inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, std::int64_t *ipiv, TYPE *b, \ - std::int64_t ldb, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t* ipiv, TYPE* b, \ + std::int64_t ldb, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return getrs(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, b, \ ldb, scratchpad, scratchpad_size, dependencies); \ } @@ -1451,28 +1451,28 @@ GETRS_LAUNCHER_USM(std::complex, cusolverDnZgetrs) #undef GETRS_LAUNCHER_USM template -inline sycl::event gesvd(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, T_A *a, std::int64_t lda, T_B *s, T_A *u, std::int64_t ldu, - T_A *vt, std::int64_t ldvt, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, T_A* a, std::int64_t lda, T_B* s, T_A* u, std::int64_t ldu, + T_A* vt, std::int64_t ldvt, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldu, ldvt, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto s_ = reinterpret_cast(s); - auto u_ = reinterpret_cast(u); - auto vt_ = reinterpret_cast(vt); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto s_ = reinterpret_cast(s); + auto u_ = reinterpret_cast(u); + auto vt_ = reinterpret_cast(vt); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; // rwork is set to nullptr. If set it is filled with information from the superdiagonal. cusolver_native_named_func(func_name, func, err, handle, get_cusolver_jobsvd(jobu), @@ -1486,11 +1486,11 @@ inline sycl::event gesvd(const char *func_name, Func func, sycl::queue &queue, } #define GESVD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ - std::int64_t m, std::int64_t n, TYPE_A *a, std::int64_t lda, TYPE_B *s, \ - TYPE_A *u, std::int64_t ldu, TYPE_A *vt, std::int64_t ldvt, \ - TYPE_A *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* s, \ + TYPE_A* u, std::int64_t ldu, TYPE_A* vt, std::int64_t ldvt, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return gesvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, u, \ ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); \ } @@ -1503,25 +1503,25 @@ GESVD_LAUNCHER_USM(std::complex, double, cusolverDnZgesvd) #undef GESVD_LAUNCHER_USM template -inline sycl::event heevd(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A *&a, - std::int64_t lda, T_B *&w, T_A *&scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -1534,10 +1534,10 @@ inline sycl::event heevd(const char *func_name, Func func, sycl::queue &queue, } #define HEEVD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ - std::int64_t n, TYPE_A *a, std::int64_t lda, TYPE_B *w, TYPE_A *scratchpad, \ + sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* w, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return heevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1548,27 +1548,27 @@ HEEVD_LAUNCHER_USM(std::complex, double, cusolverDnZheevd) #undef HEEVD_LAUNCHER_USM template -inline sycl::event hegvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A *&a, - std::int64_t lda, T_A *&b, std::int64_t ldb, T_B *&w, T_A *&scratchpad, +inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_A*& b, std::int64_t ldb, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_itype(itype), get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, @@ -1581,11 +1581,11 @@ inline sycl::event hegvd(const char *func_name, Func func, sycl::queue &queue, s } #define HEGVD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, TYPE_A *a, std::int64_t lda, \ - TYPE_A *b, std::int64_t ldb, TYPE_B *w, TYPE_A *scratchpad, \ + sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a, std::int64_t lda, \ + TYPE_A* b, std::int64_t ldb, TYPE_B* w, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return hegvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, \ ldb, w, scratchpad, scratchpad_size, dependencies); \ } @@ -1596,27 +1596,27 @@ HEGVD_LAUNCHER_USM(std::complex, double, cusolverDnZhegvd) #undef HEGVD_LAUNCHER_USM template -inline sycl::event hetrd(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T_A *a, std::int64_t lda, T_B *d, - T_B *e, T_A *tau, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T_A* a, std::int64_t lda, T_B* d, + T_B* e, T_A* tau, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tau_ = reinterpret_cast(tau); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, d_, e_, tau_, scratch_, scratchpad_size, devInfo_); @@ -1628,10 +1628,10 @@ inline sycl::event hetrd(const char *func_name, Func func, sycl::queue &queue, } #define HETRD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE_A *a, \ - std::int64_t lda, TYPE_B *d, TYPE_B *e, TYPE_A *tau, TYPE_A *scratchpad, \ + sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tau, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return hetrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1641,37 +1641,37 @@ HETRD_LAUNCHER_USM(std::complex, double, cusolverDnZhetrd) #undef HETRD_LAUNCHER_USM -sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "hetrf"); } -sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "hetrf"); } template -inline sycl::event orgbr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - T *a, std::int64_t lda, T *tau, T *scratchpad, + T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1681,10 +1681,10 @@ inline sycl::event orgbr(const char *func_name, Func func, sycl::queue &queue, } #define ORGBR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, \ - std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, TYPE *tau, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1695,22 +1695,22 @@ ORGBR_LAUNCHER_USM(double, cusolverDnDorgbr) #undef ORGBR_LAUNCHER_USM template -inline sycl::event orgqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1720,9 +1720,9 @@ inline sycl::event orgqr(const char *func_name, Func func, sycl::queue &queue, s } #define ORGQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1733,22 +1733,22 @@ ORGQR_LAUNCHER_USM(double, cusolverDnDorgqr) #undef ORGQR_LAUNCHER_USM template -inline sycl::event orgtr(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1758,9 +1758,9 @@ inline sycl::event orgtr(const char *func_name, Func func, sycl::queue &queue, } #define ORGTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1771,25 +1771,25 @@ ORGTR_LAUNCHER_USM(double, cusolverDnDorgtr) #undef ORGTR_LAUNCHER_USM template -inline sycl::event ormtr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T *a, - std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, @@ -1801,11 +1801,11 @@ inline sycl::event ormtr(const char *func_name, Func func, sycl::queue &queue, } #define ORMTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, \ - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ormtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, \ tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -1815,38 +1815,38 @@ ORMTR_LAUNCHER_USM(double, cusolverDnDormtr) #undef ORMTR_LAUNCHER_USM -sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "ormrq"); } -sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "ormrq"); } template -inline sycl::event ormqr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, - std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_operation(trans), m, n, k, a_, lda, tau_, c_, ldc, @@ -1856,14 +1856,14 @@ inline sycl::event ormqr(const char *func_name, Func func, sycl::queue &queue, return done; } -#define ORMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, \ - TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ - std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ - return ormqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ - tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ +#define ORMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ + sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return ormqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ + tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } ORMQR_LAUNCHER_USM(float, cusolverDnSormqr) @@ -1872,23 +1872,23 @@ ORMQR_LAUNCHER_USM(double, cusolverDnDormqr) #undef ORMQR_LAUNCHER_USM template -inline sycl::event potrf(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, scratch_, scratchpad_size, devInfo_); @@ -1900,9 +1900,9 @@ inline sycl::event potrf(const char *func_name, Func func, sycl::queue &queue, } #define POTRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1915,23 +1915,23 @@ POTRF_LAUNCHER_USM(std::complex, cusolverDnZpotrf) #undef POTRF_LAUNCHER_USM template -inline sycl::event potri(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto scratch_ = reinterpret_cast(scratchpad); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto scratch_ = reinterpret_cast(scratchpad); + auto devInfo_ = reinterpret_cast(devInfo); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, scratch_, scratchpad_size, devInfo_); @@ -1943,9 +1943,9 @@ inline sycl::event potri(const char *func_name, Func func, sycl::queue &queue, } #define POTRI_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potri(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1959,22 +1959,22 @@ POTRI_LAUNCHER_USM(std::complex, cusolverDnZpotri) // cusolverDnXpotrs does not use scratchpad memory template -inline sycl::event potrs(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, T *b, std::int64_t ldb, T *scratchpad, +inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, T* b, std::int64_t ldb, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, nrhs, a_, lda, b_, ldb, nullptr); @@ -1984,10 +1984,10 @@ inline sycl::event potrs(const char *func_name, Func func, sycl::queue &queue, } #define POTRS_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, TYPE *b, std::int64_t ldb, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, TYPE* b, std::int64_t ldb, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2000,24 +2000,24 @@ POTRS_LAUNCHER_USM(std::complex, cusolverDnZpotrs) #undef POTRS_LAUNCHER_USM template -inline sycl::event syevd(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T *a, - std::int64_t lda, T *w, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* w, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto w_ = reinterpret_cast(w); - auto scratch_ = reinterpret_cast(scratchpad); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + auto scratch_ = reinterpret_cast(scratchpad); + auto devInfo_ = reinterpret_cast(devInfo); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -2030,10 +2030,10 @@ inline sycl::event syevd(const char *func_name, Func func, sycl::queue &queue, } #define SYEVD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ - std::int64_t n, TYPE *a, std::int64_t lda, TYPE *w, TYPE *scratchpad, \ + sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, TYPE* a, std::int64_t lda, TYPE* w, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return syevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2044,26 +2044,26 @@ SYEVD_LAUNCHER_USM(double, cusolverDnDsyevd) #undef SYEVD_LAUNCHER_USM template -inline sycl::event sygvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T *a, - std::int64_t lda, T *b, std::int64_t ldb, T *w, T *scratchpad, +inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* b, std::int64_t ldb, T* w, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_itype(itype), get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, @@ -2075,13 +2075,13 @@ inline sycl::event sygvd(const char *func_name, Func func, sycl::queue &queue, s return done; } -#define SYGVD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, TYPE *a, std::int64_t lda, TYPE *b, \ - std::int64_t ldb, TYPE *w, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ - return sygvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, \ - ldb, w, scratchpad, scratchpad_size, dependencies); \ +#define SYGVD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ + sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, TYPE* a, std::int64_t lda, TYPE* b, \ + std::int64_t ldb, TYPE* w, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return sygvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, \ + ldb, w, scratchpad, scratchpad_size, dependencies); \ } SYGVD_LAUNCHER_USM(float, cusolverDnSsygvd) @@ -2090,26 +2090,26 @@ SYGVD_LAUNCHER_USM(double, cusolverDnDsygvd) #undef SYGVD_LAUNCHER_USM template -inline sycl::event sytrd(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *d, T *e, - T *tau, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* d, + T* e, T* tau, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tau_ = reinterpret_cast(tau); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, d_, e_, tau_, scratch_, scratchpad_size, devInfo_); @@ -2121,10 +2121,10 @@ inline sycl::event sytrd(const char *func_name, Func func, sycl::queue &queue, } #define SYTRD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *d, TYPE *e, TYPE *tau, TYPE *scratchpad, \ + sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* d, TYPE* e, TYPE* tau, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return sytrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2135,31 +2135,31 @@ SYTRD_LAUNCHER_USM(double, cusolverDnDsytrd) #undef SYTRD_LAUNCHER_USM template -inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - std::int64_t *ipiv, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); + int* devInfo = (int*)malloc_device(sizeof(int), queue); // cuSolver legacy api does not accept 64-bit ints. // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = n; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto scratch_ = reinterpret_cast(scratchpad); - auto ipiv_ = reinterpret_cast(ipiv32); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto scratch_ = reinterpret_cast(scratchpad); + auto ipiv_ = reinterpret_cast(ipiv32); + auto devInfo_ = reinterpret_cast(devInfo); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, ipiv_, scratch_, scratchpad_size, devInfo_); @@ -2167,7 +2167,7 @@ inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, }); // Copy from 32-bit USM to 64-bit - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = static_cast(ipiv32[index]); @@ -2183,13 +2183,13 @@ inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, return done_casting; } -#define SYTRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t *ipiv, TYPE *scratchpad, \ - std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ - return sytrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, \ - scratchpad, scratchpad_size, dependencies); \ +#define SYTRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ + sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return sytrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, \ + scratchpad, scratchpad_size, dependencies); \ } SYTRF_LAUNCHER_USM(float, cusolverDnSsytrf) @@ -2199,51 +2199,51 @@ SYTRF_LAUNCHER_USM(std::complex, cusolverDnZsytrf) #undef SYTRF_LAUNCHER_USM -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } template -inline sycl::event ungbr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - T *a, std::int64_t lda, T *tau, T *scratchpad, + T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -2253,10 +2253,10 @@ inline sycl::event ungbr(const char *func_name, Func func, sycl::queue &queue, } #define UNGBR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, \ - std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, TYPE *tau, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2267,22 +2267,22 @@ UNGBR_LAUNCHER_USM(std::complex, cusolverDnZungbr) #undef UNGBR_LAUNCHER_USM template -inline sycl::event ungqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -2292,9 +2292,9 @@ inline sycl::event ungqr(const char *func_name, Func func, sycl::queue &queue, s } #define UNGQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -2305,22 +2305,22 @@ UNGQR_LAUNCHER_USM(std::complex, cusolverDnZungqr) #undef UNGQR_LAUNCHER_USM template -inline sycl::event ungtr(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -2330,9 +2330,9 @@ inline sycl::event ungtr(const char *func_name, Func func, sycl::queue &queue, } #define UNGTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -2342,40 +2342,40 @@ UNGTR_LAUNCHER_USM(std::complex, cusolverDnZungtr) #undef UNGTR_LAUNCHER_USM -sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "unmrq"); } -sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "unmrq"); } template -inline sycl::event unmqr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, - std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_operation(trans), m, n, k, a_, lda, tau_, c_, ldc, @@ -2385,14 +2385,14 @@ inline sycl::event unmqr(const char *func_name, Func func, sycl::queue &queue, return done; } -#define UNMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, \ - TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ - std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ - return unmqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ - tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ +#define UNMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ + sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return unmqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ + tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } UNMQR_LAUNCHER_USM(std::complex, cusolverDnCunmqr) @@ -2401,25 +2401,25 @@ UNMQR_LAUNCHER_USM(std::complex, cusolverDnZunmqr) #undef UNMQR_LAUNCHER_USM template -inline sycl::event unmtr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T *a, - std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, @@ -2431,11 +2431,11 @@ inline sycl::event unmtr(const char *func_name, Func func, sycl::queue &queue, } #define UNMTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, \ - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return unmtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, \ tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -2448,16 +2448,18 @@ UNMTR_LAUNCHER_USM(std::complex, cusolverDnZunmtr) // SCRATCHPAD APIs template -inline void gebrd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void gebrd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size); + }); + }) + .wait(); } #define GEBRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2478,37 +2480,40 @@ GEBRD_LAUNCHER_SCRATCH(std::complex, cusolverDnZgebrd_bufferSize) #undef GEBRD_LAUNCHER_SCRATCH template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template -inline void geqrf_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void geqrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda, + scratch_size); + }); + }) + .wait(); } #define GEQRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2529,24 +2534,26 @@ GEQRF_LAUNCHER_SCRATCH(std::complex, cusolverDnZgeqrf_bufferSize) #undef GEQRF_LAUNCHER_SCRATCH template -inline void gesvd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void gesvd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, - std::int64_t ldu, std::int64_t ldvt, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size); - }); - }).wait(); + std::int64_t ldu, std::int64_t ldvt, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size); + }); + }) + .wait(); } #define GESVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ std::int64_t gesvd_scratchpad_size( \ - sycl::queue & queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, \ - std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { \ + sycl::queue & queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { \ int scratch_size; \ gesvd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobu, jobvt, m, n, lda, \ ldu, ldvt, &scratch_size); \ @@ -2561,16 +2568,19 @@ GESVD_LAUNCHER_SCRATCH(std::complex, cusolverDnZgesvd_bufferSize) #undef GESVD_LAUNCHER_SCRATCH template -inline void getrf_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void getrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda, + scratch_size); + }); + }) + .wait(); } #define GETRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2607,7 +2617,7 @@ GETRI_LAUNCHER_SCRATCH(std::complex) // cusolverDnXgetrs does not use scratchpad memory #define GETRS_LAUNCHER_SCRATCH(TYPE) \ template <> \ - std::int64_t getrs_scratchpad_size(sycl::queue & queue, oneapi::math::transpose trans, \ + std::int64_t getrs_scratchpad_size(sycl::queue & queue, oneapi::math::transpose trans, \ std::int64_t n, std::int64_t nrhs, std::int64_t lda, \ std::int64_t ldb) { \ return 0; \ @@ -2621,24 +2631,26 @@ GETRS_LAUNCHER_SCRATCH(std::complex) #undef GETRS_LAUNCHER_SCRATCH template -inline void heevd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void heevd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz), - get_cublas_fill_mode(uplo), n, nullptr, lda, nullptr, - scratch_size); - }); - }).wait(); + std::int64_t lda, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz), + get_cublas_fill_mode(uplo), n, nullptr, lda, nullptr, + scratch_size); + }); + }) + .wait(); } #define HEEVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ - std::int64_t heevd_scratchpad_size(sycl::queue & queue, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, \ + std::int64_t heevd_scratchpad_size(sycl::queue & queue, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, \ std::int64_t lda) { \ int scratch_size; \ heevd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, lda, \ @@ -2652,25 +2664,27 @@ HEEVD_LAUNCHER_SCRATCH(std::complex, cusolverDnZheevd_bufferSize) #undef HEEVD_LAUNCHER_SCRATCH template -inline void hegvd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda, std::int64_t ldb, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype), - get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, nullptr, - lda, nullptr, ldb, nullptr, scratch_size); - }); - }).wait(); +inline void hegvd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + std::int64_t ldb, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype), + get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, + nullptr, lda, nullptr, ldb, nullptr, scratch_size); + }); + }) + .wait(); } #define HEGVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ std::int64_t hegvd_scratchpad_size(sycl::queue & queue, std::int64_t itype, \ - oneapi::math::job jobz, oneapi::math::uplo uplo, \ + oneapi::math::job jobz, oneapi::math::uplo uplo, \ std::int64_t n, std::int64_t lda, std::int64_t ldb) { \ int scratch_size; \ hegvd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, \ @@ -2684,27 +2698,30 @@ HEGVD_LAUNCHER_SCRATCH(std::complex, cusolverDnZhegvd_bufferSize) #undef HEGVD_LAUNCHER_SCRATCH template -inline void hetrd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void hetrd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, nullptr, nullptr, nullptr, scratch_size); - }); - }).wait(); -} - -#define HETRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, nullptr, nullptr, nullptr, + scratch_size); + }); + }) + .wait(); +} + +#define HETRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ std::int64_t hetrd_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - int scratch_size; \ - hetrd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ - &scratch_size); \ - return scratch_size; \ + std::int64_t n, std::int64_t lda) { \ + int scratch_size; \ + hetrd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ + &scratch_size); \ + return scratch_size; \ } HETRD_LAUNCHER_SCRATCH(std::complex, cusolverDnChetrd_bufferSize) @@ -2713,39 +2730,42 @@ HETRD_LAUNCHER_SCRATCH(std::complex, cusolverDnZhetrd_bufferSize) #undef HETRD_LAUNCHER_SCRATCH template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "hetrf_scratchpad_size"); } template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { throw unimplemented("lapack", "hetrf_scratchpad_size"); } template -inline void orgbr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void orgbr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, - nullptr, lda, nullptr, scratch_size); - }); - }).wait(); -} - -#define ORGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ + std::int64_t k, std::int64_t lda, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec), + m, n, k, nullptr, lda, nullptr, scratch_size); + }); + }) + .wait(); +} + +#define ORGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ std::int64_t orgbr_scratchpad_size(sycl::queue & queue, oneapi::math::generate vec, \ - std::int64_t m, std::int64_t n, std::int64_t k, \ - std::int64_t lda) { \ - int scratch_size; \ - orgbr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, lda, \ - &scratch_size); \ - return scratch_size; \ + std::int64_t m, std::int64_t n, std::int64_t k, \ + std::int64_t lda) { \ + int scratch_size; \ + orgbr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, lda, \ + &scratch_size); \ + return scratch_size; \ } ORGBR_LAUNCHER_SCRATCH(float, cusolverDnSorgbr_bufferSize) @@ -2754,27 +2774,29 @@ ORGBR_LAUNCHER_SCRATCH(double, cusolverDnDorgbr_bufferSize) #undef ORGBR_LAUNCHER_SCRATCH template -inline void orgtr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void orgtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, nullptr, scratch_size); - }); - }).wait(); -} - -#define ORGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, nullptr, scratch_size); + }); + }) + .wait(); +} + +#define ORGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ std::int64_t orgtr_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - int scratch_size; \ - orgtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ - &scratch_size); \ - return scratch_size; \ + std::int64_t n, std::int64_t lda) { \ + int scratch_size; \ + orgtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ + &scratch_size); \ + return scratch_size; \ } ORGTR_LAUNCHER_SCRATCH(float, cusolverDnSorgtr_bufferSize) @@ -2783,17 +2805,19 @@ ORGTR_LAUNCHER_SCRATCH(double, cusolverDnDorgtr_bufferSize) #undef ORGTR_LAUNCHER_SCRATCH template -inline void orgqr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void orgqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda, nullptr, - scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda, + nullptr, scratch_size); + }); + }) + .wait(); } #define ORGQR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2812,14 +2836,14 @@ ORGQR_LAUNCHER_SCRATCH(double, cusolverDnDorgqr_bufferSize) #undef ORGQR_LAUNCHER_SCRATCH template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { throw unimplemented("lapack", "ormrq_scratchpad_size"); } template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2827,30 +2851,32 @@ std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::sid } template -inline void ormqr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ormqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), - get_cublas_operation(trans), m, n, k, nullptr, lda, nullptr, - nullptr, ldc, scratch_size); - }); - }).wait(); -} - -#define ORMQRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ - std::int64_t ormqr_scratchpad_size( \ - sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, \ - std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ - int scratch_size; \ - ormqr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, \ - lda, ldc, &scratch_size); \ - return scratch_size; \ + std::int64_t ldc, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), + get_cublas_operation(trans), m, n, k, nullptr, lda, + nullptr, nullptr, ldc, scratch_size); + }); + }) + .wait(); +} + +#define ORMQRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ + std::int64_t ormqr_scratchpad_size( \ + sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ + int scratch_size; \ + ormqr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, \ + lda, ldc, &scratch_size); \ + return scratch_size; \ } ORMQRF_LAUNCHER_SCRATCH(float, cusolverDnSormqr_bufferSize) @@ -2859,27 +2885,29 @@ ORMQRF_LAUNCHER_SCRATCH(double, cusolverDnDormqr_bufferSize) #undef ORMQRF_LAUNCHER_SCRATCH template -inline void ormtr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ormtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t lda, std::int64_t ldc, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), - get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, n, - nullptr, lda, nullptr, nullptr, ldc, scratch_size); - }); - }).wait(); + std::int64_t lda, std::int64_t ldc, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), + get_cublas_fill_mode(uplo), get_cublas_operation(trans), + m, n, nullptr, lda, nullptr, nullptr, ldc, scratch_size); + }); + }) + .wait(); } #define ORMTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ - std::int64_t ormtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ - oneapi::math::uplo uplo, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t lda, \ - std::int64_t ldc) { \ + std::int64_t ormtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ + oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, \ + std::int64_t n, std::int64_t lda, std::int64_t ldc) { \ int scratch_size; \ ormtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, \ lda, ldc, &scratch_size); \ @@ -2892,27 +2920,29 @@ ORMTR_LAUNCHER_SCRATCH(double, cusolverDnDormtr_bufferSize) #undef ORMTR_LAUNCHER_SCRATCH template -inline void potrf_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void potrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, scratch_size); - }); - }).wait(); -} - -#define POTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, scratch_size); + }); + }) + .wait(); +} + +#define POTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ std::int64_t potrf_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - int scratch_size; \ - potrf_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ - &scratch_size); \ - return scratch_size; \ + std::int64_t n, std::int64_t lda) { \ + int scratch_size; \ + potrf_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ + &scratch_size); \ + return scratch_size; \ } POTRF_LAUNCHER_SCRATCH(float, cusolverDnSpotrf_bufferSize) @@ -2925,7 +2955,7 @@ POTRF_LAUNCHER_SCRATCH(std::complex, cusolverDnZpotrf_bufferSize) // cusolverDnXpotrs does not use scratchpad memory #define POTRS_LAUNCHER_SCRATCH(TYPE) \ template <> \ - std::int64_t potrs_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t potrs_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ std::int64_t n, std::int64_t nrhs, std::int64_t lda, \ std::int64_t ldb) { \ return 0; \ @@ -2939,27 +2969,29 @@ POTRS_LAUNCHER_SCRATCH(std::complex) #undef POTRS_LAUNCHER_SCRATCH template -inline void potri_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void potri_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, scratch_size); - }); - }).wait(); -} - -#define POTRI_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, scratch_size); + }); + }) + .wait(); +} + +#define POTRI_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ std::int64_t potri_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - int scratch_size; \ - potri_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ - &scratch_size); \ - return scratch_size; \ + std::int64_t n, std::int64_t lda) { \ + int scratch_size; \ + potri_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ + &scratch_size); \ + return scratch_size; \ } POTRI_LAUNCHER_SCRATCH(float, cusolverDnSpotri_bufferSize) @@ -2970,26 +3002,29 @@ POTRI_LAUNCHER_SCRATCH(std::complex, cusolverDnZpotri_bufferSize) #undef POTRI_LAUNCHER_SCRATCH template -inline void sytrf_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void sytrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, nullptr, lda, scratch_size); - }); - }).wait(); -} - -#define SYTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, nullptr, lda, + scratch_size); + }); + }) + .wait(); +} + +#define SYTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ std::int64_t sytrf_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - int scratch_size; \ - sytrf_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ - &scratch_size); \ - return scratch_size; \ + std::int64_t n, std::int64_t lda) { \ + int scratch_size; \ + sytrf_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ + &scratch_size); \ + return scratch_size; \ } SYTRF_LAUNCHER_SCRATCH(float, cusolverDnSsytrf_bufferSize) @@ -3000,24 +3035,26 @@ SYTRF_LAUNCHER_SCRATCH(std::complex, cusolverDnZsytrf_bufferSize) #undef SYTRF_LAUNCHER_SCRATCH template -inline void syevd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void syevd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz), - get_cublas_fill_mode(uplo), n, nullptr, lda, nullptr, - scratch_size); - }); - }).wait(); + std::int64_t lda, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz), + get_cublas_fill_mode(uplo), n, nullptr, lda, nullptr, + scratch_size); + }); + }) + .wait(); } #define SYEVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ - std::int64_t syevd_scratchpad_size(sycl::queue & queue, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, \ + std::int64_t syevd_scratchpad_size(sycl::queue & queue, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, \ std::int64_t lda) { \ int scratch_size; \ syevd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, lda, \ @@ -3031,25 +3068,27 @@ SYEVD_LAUNCHER_SCRATCH(double, cusolverDnDsyevd_bufferSize) #undef SYEVD_LAUNCHER_SCRATCH template -inline void sygvd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda, std::int64_t ldb, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype), - get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, nullptr, - lda, nullptr, ldb, nullptr, scratch_size); - }); - }).wait(); +inline void sygvd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + std::int64_t ldb, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype), + get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, + nullptr, lda, nullptr, ldb, nullptr, scratch_size); + }); + }) + .wait(); } #define SYGVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ std::int64_t sygvd_scratchpad_size(sycl::queue & queue, std::int64_t itype, \ - oneapi::math::job jobz, oneapi::math::uplo uplo, \ + oneapi::math::job jobz, oneapi::math::uplo uplo, \ std::int64_t n, std::int64_t lda, std::int64_t ldb) { \ int scratch_size; \ sygvd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, \ @@ -3063,27 +3102,30 @@ SYGVD_LAUNCHER_SCRATCH(double, cusolverDnDsygvd_bufferSize) #undef SYGVD_LAUNCHER_SCRATCH template -inline void sytrd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void sytrd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, nullptr, nullptr, nullptr, scratch_size); - }); - }).wait(); -} - -#define SYTRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, nullptr, nullptr, nullptr, + scratch_size); + }); + }) + .wait(); +} + +#define SYTRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ std::int64_t sytrd_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - int scratch_size; \ - sytrd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ - &scratch_size); \ - return scratch_size; \ + std::int64_t n, std::int64_t lda) { \ + int scratch_size; \ + sytrd_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ + &scratch_size); \ + return scratch_size; \ } SYTRD_LAUNCHER_SCRATCH(float, cusolverDnSsytrd_bufferSize) @@ -3092,21 +3134,21 @@ SYTRD_LAUNCHER_SCRATCH(double, cusolverDnDsytrd_bufferSize) #undef SYTRD_LAUNCHER_SCRATCH template <> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { throw unimplemented("lapack", "trtrs_scratchpad_size"); } template <> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { throw unimplemented("lapack", "trtrs_scratchpad_size"); } template <> -std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, @@ -3114,7 +3156,8 @@ std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, onea throw unimplemented("lapack", "trtrs_scratchpad_size"); } template <> -std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, @@ -3123,28 +3166,30 @@ std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, one } template -inline void ungbr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ungbr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, - nullptr, lda, nullptr, scratch_size); - }); - }).wait(); -} - -#define UNGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ + std::int64_t k, std::int64_t lda, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec), + m, n, k, nullptr, lda, nullptr, scratch_size); + }); + }) + .wait(); +} + +#define UNGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ std::int64_t ungbr_scratchpad_size(sycl::queue & queue, oneapi::math::generate vec, \ - std::int64_t m, std::int64_t n, std::int64_t k, \ - std::int64_t lda) { \ - int scratch_size; \ - ungbr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, lda, \ - &scratch_size); \ - return scratch_size; \ + std::int64_t m, std::int64_t n, std::int64_t k, \ + std::int64_t lda) { \ + int scratch_size; \ + ungbr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, lda, \ + &scratch_size); \ + return scratch_size; \ } UNGBR_LAUNCHER_SCRATCH(std::complex, cusolverDnCungbr_bufferSize) @@ -3153,17 +3198,19 @@ UNGBR_LAUNCHER_SCRATCH(std::complex, cusolverDnZungbr_bufferSize) #undef UNGBR_LAUNCHER_SCRATCH template -inline void ungqr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ungqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda, nullptr, - scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda, + nullptr, scratch_size); + }); + }) + .wait(); } #define UNGQR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -3182,27 +3229,29 @@ UNGQR_LAUNCHER_SCRATCH(std::complex, cusolverDnZungqr_bufferSize) #undef UNGQR_LAUNCHER_SCRATCH template -inline void ungtr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ungtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, nullptr, scratch_size); - }); - }).wait(); -} - -#define UNGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, nullptr, scratch_size); + }); + }) + .wait(); +} + +#define UNGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ std::int64_t ungtr_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - int scratch_size; \ - ungtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ - &scratch_size); \ - return scratch_size; \ + std::int64_t n, std::int64_t lda) { \ + int scratch_size; \ + ungtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, lda, \ + &scratch_size); \ + return scratch_size; \ } UNGTR_LAUNCHER_SCRATCH(std::complex, cusolverDnCungtr_bufferSize) @@ -3211,7 +3260,7 @@ UNGTR_LAUNCHER_SCRATCH(std::complex, cusolverDnZungtr_bufferSize) #undef UNGTR_LAUNCHER_SCRATCH template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmrq_scratchpad_size>(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -3219,39 +3268,39 @@ std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, onea throw unimplemented("lapack", "unmrq_scratchpad_size"); } template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc) { +std::int64_t unmrq_scratchpad_size>( + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { throw unimplemented("lapack", "unmrq_scratchpad_size"); } template -inline void unmqr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void unmqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), - get_cublas_operation(trans), m, n, k, nullptr, lda, nullptr, - nullptr, ldc, scratch_size); - }); - }).wait(); -} - -#define UNMQR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ - std::int64_t unmqr_scratchpad_size( \ - sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, \ - std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ - int scratch_size; \ - unmqr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, \ - lda, ldc, &scratch_size); \ - return scratch_size; \ + std::int64_t ldc, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), + get_cublas_operation(trans), m, n, k, nullptr, lda, + nullptr, nullptr, ldc, scratch_size); + }); + }) + .wait(); +} + +#define UNMQR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ + std::int64_t unmqr_scratchpad_size( \ + sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ + int scratch_size; \ + unmqr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, \ + lda, ldc, &scratch_size); \ + return scratch_size; \ } UNMQR_LAUNCHER_SCRATCH(std::complex, cusolverDnCunmqr_bufferSize) @@ -3260,27 +3309,29 @@ UNMQR_LAUNCHER_SCRATCH(std::complex, cusolverDnZunmqr_bufferSize) #undef UNMQR_LAUNCHER_SCRATCH template -inline void unmtr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void unmtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t lda, std::int64_t ldc, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), - get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, n, - nullptr, lda, nullptr, nullptr, ldc, scratch_size); - }); - }).wait(); + std::int64_t lda, std::int64_t ldc, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemath_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), + get_cublas_fill_mode(uplo), get_cublas_operation(trans), + m, n, nullptr, lda, nullptr, nullptr, ldc, scratch_size); + }); + }) + .wait(); } #define UNMTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ - std::int64_t unmtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ - oneapi::math::uplo uplo, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t lda, \ - std::int64_t ldc) { \ + std::int64_t unmtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ + oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, \ + std::int64_t n, std::int64_t lda, std::int64_t ldc) { \ int scratch_size; \ unmtr_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, \ lda, ldc, &scratch_size); \ diff --git a/src/lapack/backends/cusolver/cusolver_scope_handle.cpp b/src/lapack/backends/cusolver/cusolver_scope_handle.cpp index 67175ad8d..af0881c10 100644 --- a/src/lapack/backends/cusolver/cusolver_scope_handle.cpp +++ b/src/lapack/backends/cusolver/cusolver_scope_handle.cpp @@ -44,7 +44,7 @@ thread_local cusolver_handle CusolverScopedContextHandler::handle_he #endif CusolverScopedContextHandler::CusolverScopedContextHandler(sycl::queue queue, - sycl::interop_handle &ih) + sycl::interop_handle& ih) : ih(ih), needToRecover_(false) { placedContext_ = new sycl::context(queue.get_context()); @@ -74,8 +74,8 @@ CusolverScopedContextHandler::~CusolverScopedContextHandler() noexcept(false) { delete placedContext_; } -void ContextCallback(void *userData) { - auto *ptr = static_cast *>(userData); +void ContextCallback(void* userData) { + auto* ptr = static_cast*>(userData); if (!ptr) { return; } @@ -93,7 +93,7 @@ void ContextCallback(void *userData) { } } -cusolverDnHandle_t CusolverScopedContextHandler::get_handle(const sycl::queue &queue) { +cusolverDnHandle_t CusolverScopedContextHandler::get_handle(const sycl::queue& queue) { auto cudaDevice = ih.get_native_device(); CUresult cuErr; CUcontext desired; @@ -140,10 +140,10 @@ cusolverDnHandle_t CusolverScopedContextHandler::get_handle(const sycl::queue &q return handle; } -CUstream CusolverScopedContextHandler::get_stream(const sycl::queue &queue) { +CUstream CusolverScopedContextHandler::get_stream(const sycl::queue& queue) { return sycl::get_native(queue); } -sycl::context CusolverScopedContextHandler::get_context(const sycl::queue &queue) { +sycl::context CusolverScopedContextHandler::get_context(const sycl::queue& queue) { return queue.get_context(); } diff --git a/src/lapack/backends/cusolver/cusolver_scope_handle.hpp b/src/lapack/backends/cusolver/cusolver_scope_handle.hpp index b552b4af2..d6443b41a 100644 --- a/src/lapack/backends/cusolver/cusolver_scope_handle.hpp +++ b/src/lapack/backends/cusolver/cusolver_scope_handle.hpp @@ -89,19 +89,19 @@ cuSolver handle to the SYCL context. class CusolverScopedContextHandler { CUcontext original_; - sycl::context *placedContext_; + sycl::context* placedContext_; bool needToRecover_; - sycl::interop_handle &ih; + sycl::interop_handle& ih; #ifdef ONEMATH_PI_INTERFACE_REMOVED static thread_local cusolver_handle handle_helper; #else static thread_local cusolver_handle handle_helper; #endif - CUstream get_stream(const sycl::queue &queue); - sycl::context get_context(const sycl::queue &queue); + CUstream get_stream(const sycl::queue& queue); + sycl::context get_context(const sycl::queue& queue); public: - CusolverScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + CusolverScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); ~CusolverScopedContextHandler() noexcept(false); /** @@ -111,7 +111,7 @@ class CusolverScopedContextHandler { * @param queue sycl queue. * @return cusolverDnHandle_t a handle to construct cusolver routines */ - cusolverDnHandle_t get_handle(const sycl::queue &queue); + cusolverDnHandle_t get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. template @@ -120,7 +120,7 @@ class CusolverScopedContextHandler { return reinterpret_cast(cudaPtr); } - void wait_stream(const sycl::queue &queue) { + void wait_stream(const sycl::queue& queue) { cuStreamSynchronize(get_stream(queue)); } }; diff --git a/src/lapack/backends/cusolver/cusolver_task.hpp b/src/lapack/backends/cusolver/cusolver_task.hpp index 497b29052..802230c9a 100644 --- a/src/lapack/backends/cusolver/cusolver_task.hpp +++ b/src/lapack/backends/cusolver/cusolver_task.hpp @@ -49,9 +49,9 @@ namespace lapack { namespace cusolver { template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih) { #else cgh.host_task([f, queue](sycl::interop_handle ih) { #endif @@ -61,7 +61,7 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } template -static inline void onemath_cusolver_host_task(H &cgh, sycl::queue queue, F f) { +static inline void onemath_cusolver_host_task(H& cgh, sycl::queue queue, F f) { (void)host_task_internal(cgh, queue, f); } diff --git a/src/lapack/backends/mkl_common/mkl_lapack.cxx b/src/lapack/backends/mkl_common/mkl_lapack.cxx index a0c85f82b..6c440b1c6 100644 --- a/src/lapack/backends/mkl_common/mkl_lapack.cxx +++ b/src/lapack/backends/mkl_common/mkl_lapack.cxx @@ -17,2495 +17,2817 @@ * SPDX-License-Identifier: Apache-2.0 *******************************************************************************/ -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, sycl::buffer> &taup, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size)); -} -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size)); +} +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size)); } -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size)); } -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tauq, - sycl::buffer> &taup, sycl::buffer> &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tauq, + sycl::buffer>& taup, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size)); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); -} -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); -} -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); +} +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); +} +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); } -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); } -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); } -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); } -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size)); } -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size)); +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size)); } -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size)); } -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size)); } -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size)); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size)); +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size)); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size)); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size)); -} -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size)); -} -void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size)); -} -void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size)); +} +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size)); +} +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size)); +} +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size)); } -void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size)); -} -void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size)); +} +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size)); -} -void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, - scratchpad_size)); -} -void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, - scratchpad_size)); -} -void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, - scratchpad_size)); -} -void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, - scratchpad_size)); -} -void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::heevd(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, scratchpad, scratchpad_size)); -} -void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::heevd(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, scratchpad, scratchpad_size)); -} -void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hegvd(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, ldb, w, scratchpad, - scratchpad_size)); -} -void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hegvd(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, ldb, w, scratchpad, - scratchpad_size)); -} -void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrd(queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size)); -} -void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrd(queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size)); -} -void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); -} -void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size)); +} +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); -} -void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, scratchpad, scratchpad_size)); -} -void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, scratchpad, scratchpad_size)); -} -void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size)); -} -void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size)); -} -void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, scratchpad_size)); +} +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgtr(queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size)); -} -void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgtr(queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size)); -} -void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormtr(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); -} -void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormtr(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); -} -void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormrq(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); -} -void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormrq(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); -} -void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormqr(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); -} -void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormqr(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); -} -void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size)); -} -void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size)); -} -void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size)); -} -void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size)); -} -void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size)); -} -void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size)); -} -void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size)); -} -void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size)); -} -void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size)); -} -void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size)); -} -void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size)); -} -void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size)); -} -void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::syevd(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, scratchpad, scratchpad_size)); -} -void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::syevd(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, scratchpad, scratchpad_size)); -} -void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sygvd(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, ldb, w, scratchpad, - scratchpad_size)); -} -void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, scratchpad_size)); +} +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, scratchpad_size)); +} +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::gesvd( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, scratchpad_size)); +} +void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::heevd(queue, detail::get_onemkl_job(jobz), + detail::get_onemkl_uplo(uplo), n, a, lda, + w, scratchpad, scratchpad_size)); +} +void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::heevd(queue, detail::get_onemkl_job(jobz), + detail::get_onemkl_uplo(uplo), n, a, lda, + w, scratchpad, scratchpad_size)); +} +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hegvd( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, + ldb, w, scratchpad, scratchpad_size)); +} +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hegvd( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, + ldb, w, scratchpad, scratchpad_size)); +} +void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrd( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size)); +} +void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrd( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size)); +} +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrf( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); +} +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sygvd(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, ldb, w, scratchpad, - scratchpad_size)); -} -void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::hetrf( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); +} +void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), + m, n, k, a, lda, tau, scratchpad, + scratchpad_size)); +} +void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), + m, n, k, a, lda, tau, scratchpad, + scratchpad_size)); +} +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size)); +} +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size)); +} +void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrd(queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgtr( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size)); } -void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, +void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrd(queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size)); -} -void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgtr( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size)); +} +void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormtr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, + scratchpad_size)); +} +void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormtr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, + scratchpad_size)); +} +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormrq( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size)); +} +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormrq( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size)); +} +void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormqr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size)); +} +void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ormqr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size)); +} +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, + a, lda, scratchpad, scratchpad_size)); +} +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, + a, lda, scratchpad, scratchpad_size)); +} +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, + a, lda, scratchpad, scratchpad_size)); +} +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, + a, lda, scratchpad, scratchpad_size)); +} +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, + a, lda, scratchpad, scratchpad_size)); +} +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, + a, lda, scratchpad, scratchpad_size)); +} +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, + a, lda, scratchpad, scratchpad_size)); +} +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, + a, lda, scratchpad, scratchpad_size)); +} +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, + nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size)); +} +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, + nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size)); +} +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, + nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size)); +} +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, + nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size)); +} +void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::syevd(queue, detail::get_onemkl_job(jobz), + detail::get_onemkl_uplo(uplo), n, a, lda, + w, scratchpad, scratchpad_size)); +} +void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::syevd(queue, detail::get_onemkl_job(jobz), + detail::get_onemkl_uplo(uplo), n, a, lda, + w, scratchpad, scratchpad_size)); +} +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sygvd( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, + ldb, w, scratchpad, scratchpad_size)); } -void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sygvd( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, + ldb, w, scratchpad, scratchpad_size)); } -void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); -} -void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrd( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size)); } -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size)); -} -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size)); -} -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size)); -} -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size)); -} -void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrd( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, scratchpad_size)); } -void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); } -void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size)); -} -void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); +} +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); +} +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::sytrf( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size)); } -void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size)); +} +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size)); +} +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size)); +} +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::trtrs( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size)); +} +void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungtr(queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), + m, n, k, a, lda, tau, scratchpad, + scratchpad_size)); } -void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungtr(queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), + m, n, k, a, lda, tau, scratchpad, + scratchpad_size)); } -void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmrq(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size)); } -void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmrq(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size)); } -void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmqr(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungtr( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size)); } -void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmqr(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); -} -void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungtr( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size)); +} +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmrq( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size)); +} +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmrq( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size)); +} +void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmqr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size)); +} +void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmqr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size)); +} +void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmtr(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmtr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, + scratchpad_size)); } -void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, +void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmtr(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, - scratchpad_size)); -} -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tauq, double *taup, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tauq, float *taup, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, - float *s, std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, ldb, w, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, ldb, w, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd(queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd(queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr(queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr(queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), n, a, lda, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *w, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *w, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *w, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, ldb, w, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *w, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, ldb, w, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd(queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd(queue, detail::get_onemkl_uplo(uplo), n, a, lda, d, e, tau, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), n, a, lda, ipiv, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::unmtr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, + scratchpad_size)); +} +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies)); +} +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* d, double* e, double* tauq, double* taup, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies)); +} +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tauq, float* taup, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies)); +} +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd( + queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies)); +} +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf( + queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf( + queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf( + queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf( + queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrs(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, + ipiv, b, ldb, scratchpad, scratchpad_size, dependencies)); +} +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies)); +} +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, float* s, float* u, + std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies)); +} +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + float* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies)); +} +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + double* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, a, lda, s, + u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies)); +} +sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd( + queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd( + queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, + ldb, w, scratchpad, scratchpad_size, dependencies)); +} +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, + ldb, w, scratchpad, scratchpad_size, dependencies)); +} +sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, d, e, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, d, e, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::orgbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies)); +} +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies)); +} +sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies)); +} +sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies)); +} +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, dependencies)); +} +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, dependencies)); +} +sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, dependencies)); +} +sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, dependencies)); +} +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), + n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), + n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), + n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs(queue, detail::get_onemkl_uplo(uplo), + n, nrhs, a, lda, b, ldb, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd( + queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd( + queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, w, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, + ldb, w, scratchpad, scratchpad_size, dependencies)); +} +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, a, lda, b, + ldb, w, scratchpad, scratchpad_size, dependencies)); +} +sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, d, e, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, d, e, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr(queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr(queue, detail::get_onemkl_uplo(uplo), n, a, lda, tau, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies)); +} +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies)); +} +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies)); +} +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies)); +} +sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungbr(queue, detail::get_onemkl_generate(vec), m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies)); +} +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr( + queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies)); +} +sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr(queue, detail::get_onemkl_uplo(uplo), + n, a, lda, tau, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, dependencies)); +} +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, dependencies)); +} +sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, dependencies)); +} +sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, dependencies)); +} +sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies)); +} +sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, - scratchpad, scratchpad_size, dependencies)); -} -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies)); +} +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size)); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size)); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size)); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::geqrf_batch( + queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size)); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, - scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size)); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, - scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size)); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, - scratchpad, scratchpad_size)); + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size)); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, - scratchpad, scratchpad_size)); + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getri_batch( + queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size)); } -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, sycl::buffer &b, +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, - b, ldb, stride_b, batch_size, scratchpad, scratchpad_size)); -} -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, + ldb, stride_b, batch_size, scratchpad, scratchpad_size)); +} +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, - b, ldb, stride_b, batch_size, scratchpad, scratchpad_size)); -} -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, + ldb, stride_b, batch_size, scratchpad, scratchpad_size)); +} +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, - b, ldb, stride_b, batch_size, scratchpad, scratchpad_size)); -} -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, + ldb, stride_b, batch_size, scratchpad, scratchpad_size)); +} +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, - b, ldb, stride_b, batch_size, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, + ldb, stride_b, batch_size, scratchpad, scratchpad_size)); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, - scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size)); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, - scratchpad, scratchpad_size)); -} -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size)); +} +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, - scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size)); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, - scratchpad, scratchpad_size)); -} -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size)); -} -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size)); -} -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad, - scratchpad_size)); -} -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::getrf_batch( + queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size)); +} +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, + scratchpad, scratchpad_size)); +} +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, + scratchpad, scratchpad_size)); +} +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, + stride_a, batch_size, scratchpad, scratchpad_size)); } -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, + stride_a, batch_size, scratchpad, scratchpad_size)); } -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad, - scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, + stride_a, batch_size, scratchpad, scratchpad_size)); } -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size)); -} -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS( + ::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, + stride_a, batch_size, scratchpad, scratchpad_size)); +} +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size)); -} -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size)); +} +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size)); -} -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size)); +} +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size)); -} -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size)); +} +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size)); -} -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size)); +} +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, + scratchpad, scratchpad_size)); +} +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size)); + RETHROW_ONEMKL_EXCEPTIONS(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, + scratchpad, scratchpad_size)); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size, - dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size, dependencies)); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size, - dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size, dependencies)); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, + ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, + ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, + ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, - stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size, dependencies)); -} -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, std::int64_t **ipiv, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, - group_count, group_sizes, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, std::int64_t **ipiv, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, - group_count, group_sizes, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, - group_count, group_sizes, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch(queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, - group_count, group_sizes, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, + ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, group_count, + group_sizes, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, group_count, + group_sizes, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, group_count, + group_sizes, scratchpad, scratchpad_size, dependencies)); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch( + queue, detail::get_onemkl_transpose(trans), n, nrhs, a, lda, ipiv, b, ldb, group_count, + group_sizes, scratchpad, scratchpad_size, dependencies)); +} +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float **a, std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double **a, std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float** a, std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double** a, std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size, dependencies)); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size, dependencies)); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, stride_a, batch_size, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch(queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch( + queue, detail::get_onemkl_uplo(uplo), n, a, lda, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies)); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, - stride_b, batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, stride_a, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch(queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, - group_sizes, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, - batch_size, scratchpad, scratchpad_size, - dependencies)); -} -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); -} -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies)); + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, a, lda, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, + batch_size, scratchpad, scratchpad_size, dependencies)); +} +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); +} +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies)); } template <> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd_scratchpad_size(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::gebrd_scratchpad_size(queue, m, n, lda)); } template <> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd_scratchpad_size(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::gebrd_scratchpad_size(queue, m, n, lda)); } template <> -std::int64_t gebrd_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gebrd_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd_scratchpad_size>(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::gebrd_scratchpad_size>(queue, m, n, lda)); } template <> -std::int64_t gebrd_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gebrd_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gebrd_scratchpad_size>(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::gebrd_scratchpad_size>(queue, m, n, lda)); } template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf_scratchpad_size(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::gerqf_scratchpad_size(queue, m, n, lda)); } template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf_scratchpad_size(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::gerqf_scratchpad_size(queue, m, n, lda)); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf_scratchpad_size>(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::gerqf_scratchpad_size>(queue, m, n, lda)); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gerqf_scratchpad_size>(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::gerqf_scratchpad_size>(queue, m, n, lda)); } template <> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_scratchpad_size(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_scratchpad_size(queue, m, n, lda)); } template <> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_scratchpad_size(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_scratchpad_size(queue, m, n, lda)); } template <> -std::int64_t geqrf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_scratchpad_size>(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_scratchpad_size>(queue, m, n, lda)); } template <> -std::int64_t geqrf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_scratchpad_size>(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_scratchpad_size>(queue, m, n, lda)); } template <> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda, ldu, - ldvt)); +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, + std::int64_t n, std::int64_t lda, std::int64_t ldu, + std::int64_t ldvt) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda, ldu, + ldvt)); } template <> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda, ldu, - ldvt)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda, ldu, + ldvt)); } template <> -std::int64_t gesvd_scratchpad_size>(sycl::queue &queue, +std::int64_t gesvd_scratchpad_size>(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size>(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, - n, lda, ldu, ldvt)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size>( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda, ldu, + ldvt)); } template <> -std::int64_t gesvd_scratchpad_size>(sycl::queue &queue, +std::int64_t gesvd_scratchpad_size>(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::gesvd_scratchpad_size>(queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, - n, lda, ldu, ldvt)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::gesvd_scratchpad_size>( + queue, detail::get_onemkl_jobsvd(jobu), detail::get_onemkl_jobsvd(jobvt), m, n, lda, + ldu, ldvt)); } template <> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_scratchpad_size(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_scratchpad_size(queue, m, n, lda)); } template <> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_scratchpad_size(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_scratchpad_size(queue, m, n, lda)); } template <> -std::int64_t getrf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_scratchpad_size>(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_scratchpad_size>(queue, m, n, lda)); } template <> -std::int64_t getrf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_scratchpad_size>(queue, m, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_scratchpad_size>(queue, m, n, lda)); } template <> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_scratchpad_size(queue, n, lda)); +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_scratchpad_size(queue, n, lda)); } template <> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_scratchpad_size(queue, n, lda)); +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_scratchpad_size(queue, n, lda)); } template <> -std::int64_t getri_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_scratchpad_size>(queue, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_scratchpad_size>(queue, n, lda)); } template <> -std::int64_t getri_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_scratchpad_size>(queue, n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_scratchpad_size>(queue, n, lda)); } template <> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size(queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size( + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb)); } template <> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size(queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size( + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb)); } template <> -std::int64_t getrs_scratchpad_size>(sycl::queue &queue, +std::int64_t getrs_scratchpad_size>(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size>(queue, detail::get_onemkl_transpose(trans), n, nrhs, - lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size>( + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb)); } template <> -std::int64_t getrs_scratchpad_size>(sycl::queue &queue, +std::int64_t getrs_scratchpad_size>(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_scratchpad_size>(queue, detail::get_onemkl_transpose(trans), n, nrhs, - lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrs_scratchpad_size>( + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb)); } template <> -std::int64_t heevd_scratchpad_size>(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t heevd_scratchpad_size>(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd_scratchpad_size>(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, - lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd_scratchpad_size>( + queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t heevd_scratchpad_size>(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t heevd_scratchpad_size>(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::heevd_scratchpad_size>(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, - lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::heevd_scratchpad_size>( + queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t hegvd_scratchpad_size>(sycl::queue &queue, std::int64_t itype, +std::int64_t hegvd_scratchpad_size>(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd_scratchpad_size>(queue, itype, detail::get_onemkl_job(jobz), - detail::get_onemkl_uplo(uplo), n, lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd_scratchpad_size>( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda, ldb)); } template <> -std::int64_t hegvd_scratchpad_size>(sycl::queue &queue, std::int64_t itype, +std::int64_t hegvd_scratchpad_size>(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hegvd_scratchpad_size>(queue, itype, detail::get_onemkl_job(jobz), - detail::get_onemkl_uplo(uplo), n, lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::hegvd_scratchpad_size>( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda, + ldb)); } template <> -std::int64_t hetrd_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t hetrd_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t hetrd_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrd_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); +std::int64_t hetrd_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::hetrd_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::hetrf_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::hetrf_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgbr_scratchpad_size(queue, detail::get_onemkl_generate(vect), m, n, k, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgbr_scratchpad_size( + queue, detail::get_onemkl_generate(vect), m, n, k, lda)); } template <> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgbr_scratchpad_size(queue, detail::get_onemkl_generate(vect), m, n, k, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgbr_scratchpad_size( + queue, detail::get_onemkl_generate(vect), m, n, k, lda)); } template <> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgtr_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_scratchpad_size(queue, m, n, k, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::orgqr_scratchpad_size(queue, m, n, k, lda)); } template <> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_scratchpad_size(queue, m, n, k, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::orgqr_scratchpad_size(queue, m, n, k, lda)); } template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq_scratchpad_size(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, - ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq_scratchpad_size( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, + ldc)); } template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq_scratchpad_size(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, - ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormrq_scratchpad_size( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, + ldc)); } template <> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr_scratchpad_size(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, - ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr_scratchpad_size( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, + ldc)); } template <> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr_scratchpad_size(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, - ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormqr_scratchpad_size( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, + ldc)); } template <> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr_scratchpad_size(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, lda, - ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr_scratchpad_size( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, lda, ldc)); } template <> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr_scratchpad_size(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, lda, - ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ormtr_scratchpad_size( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, lda, ldc)); } template <> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t potrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrf_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t potrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); +std::int64_t potrf_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrf_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb)); } template <> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb)); } template <> -std::int64_t potrs_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrs_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, nrhs, - lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb)); } template <> -std::int64_t potrs_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, nrhs, - lda, ldb)); +std::int64_t potrs_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrs_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb)); } template <> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t potri_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potri_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t potri_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potri_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); +std::int64_t potri_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potri_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t sytrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t sytrf_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t sytrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrf_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); +std::int64_t sytrf_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::sytrf_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd_scratchpad_size(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd_scratchpad_size( + queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd_scratchpad_size(queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::syevd_scratchpad_size( + queue, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd_scratchpad_size(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda, - ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd_scratchpad_size( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda, ldb)); } template <> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd_scratchpad_size(queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda, - ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sygvd_scratchpad_size( + queue, itype, detail::get_onemkl_job(jobz), detail::get_onemkl_uplo(uplo), n, lda, ldb)); } template <> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::sytrd_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, - lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, lda, ldb)); } template <> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, - lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, lda, ldb)); } template <> -std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, lda, ldb)); + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, lda, ldb)); } template <> -std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::trtrs_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), detail::get_onemkl_diag(diag), n, nrhs, lda, ldb)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::trtrs_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), + detail::get_onemkl_diag(diag), n, nrhs, lda, ldb)); } template <> -std::int64_t ungbr_scratchpad_size>(sycl::queue &queue, +std::int64_t ungbr_scratchpad_size>(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungbr_scratchpad_size>(queue, detail::get_onemkl_generate(vect), m, n, k, - lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungbr_scratchpad_size>( + queue, detail::get_onemkl_generate(vect), m, n, k, lda)); } template <> -std::int64_t ungbr_scratchpad_size>(sycl::queue &queue, - oneapi::math::generate vect, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungbr_scratchpad_size>(queue, detail::get_onemkl_generate(vect), m, n, k, - lda)); +std::int64_t ungbr_scratchpad_size>(sycl::queue& queue, + oneapi::math::generate vect, + std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungbr_scratchpad_size>( + queue, detail::get_onemkl_generate(vect), m, n, k, lda)); } template <> -std::int64_t ungqr_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t ungqr_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_scratchpad_size>(queue, m, n, k, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_scratchpad_size>(queue, m, n, k, lda)); } template <> -std::int64_t ungqr_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t ungqr_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_scratchpad_size>(queue, m, n, k, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_scratchpad_size>(queue, m, n, k, lda)); } template <> -std::int64_t ungtr_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t ungtr_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t ungtr_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungtr_scratchpad_size>(queue, detail::get_onemkl_uplo(uplo), n, lda)); +std::int64_t ungtr_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungtr_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda)); } template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmrq_scratchpad_size>(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq_scratchpad_size>(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, - n, k, lda, ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq_scratchpad_size>( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, + ldc)); } template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmrq_scratchpad_size>(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, - n, k, lda, ldc)); +std::int64_t unmrq_scratchpad_size>( + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::unmrq_scratchpad_size>( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, + ldc)); } template <> -std::int64_t unmqr_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmqr_scratchpad_size>(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr_scratchpad_size>(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, - n, k, lda, ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr_scratchpad_size>( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, + ldc)); } template <> -std::int64_t unmqr_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmqr_scratchpad_size>(queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, - n, k, lda, ldc)); +std::int64_t unmqr_scratchpad_size>( + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::unmqr_scratchpad_size>( + queue, detail::get_onemkl_side(side), detail::get_onemkl_transpose(trans), m, n, k, lda, + ldc)); } template <> -std::int64_t unmtr_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmtr_scratchpad_size>(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr_scratchpad_size>(queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), - detail::get_onemkl_transpose(trans), m, n, lda, ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr_scratchpad_size>( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, lda, ldc)); } template <> -std::int64_t unmtr_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmtr_scratchpad_size>(sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::unmtr_scratchpad_size>( - queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), detail::get_onemkl_transpose(trans), m, n, lda, ldc)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::unmtr_scratchpad_size>( + queue, detail::get_onemkl_side(side), detail::get_onemkl_uplo(uplo), + detail::get_onemkl_transpose(trans), m, n, lda, ldc)); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size(queue, m, n, lda, stride_a, - stride_ipiv, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size( + queue, m, n, lda, stride_a, stride_ipiv, batch_size)); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size(queue, m, n, lda, stride_a, - stride_ipiv, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size( + queue, m, n, lda, stride_a, stride_ipiv, batch_size)); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( - queue, m, n, lda, stride_a, stride_ipiv, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( + queue, m, n, lda, stride_a, stride_ipiv, batch_size)); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( - queue, m, n, lda, stride_a, stride_ipiv, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( + queue, m, n, lda, stride_a, stride_ipiv, batch_size)); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size(queue, n, lda, stride_a, - stride_ipiv, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size( + queue, n, lda, stride_a, stride_ipiv, batch_size)); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size(queue, n, lda, stride_a, - stride_ipiv, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size( + queue, n, lda, stride_a, stride_ipiv, batch_size)); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size>( - queue, n, lda, stride_a, stride_ipiv, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch_scratchpad_size>( + queue, n, lda, stride_a, stride_ipiv, batch_size)); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size>( - queue, n, lda, stride_a, stride_ipiv, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch_scratchpad_size>( + queue, n, lda, stride_a, stride_ipiv, batch_size)); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size( - queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size)); + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, + stride_b, batch_size)); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size( - queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size)); + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, + stride_b, batch_size)); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( - queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, + stride_b, batch_size)); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( - queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, stride_a, stride_ipiv, ldb, + stride_b, batch_size)); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size(queue, m, n, lda, stride_a, - stride_tau, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size( + queue, m, n, lda, stride_a, stride_tau, batch_size)); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size(queue, m, n, lda, stride_a, - stride_tau, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size( + queue, m, n, lda, stride_a, stride_tau, batch_size)); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( - queue, m, n, lda, stride_a, stride_tau, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( + queue, m, n, lda, stride_a, stride_tau, batch_size)); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( - queue, m, n, lda, stride_a, stride_tau, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( + queue, m, n, lda, stride_a, stride_tau, batch_size)); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, - batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size)); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, - batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size)); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size)); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda, stride_a, batch_size)); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { @@ -2513,7 +2835,7 @@ std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size)); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, @@ -2523,271 +2845,295 @@ std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mat } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size)); + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, stride_a, ldb, stride_b, + batch_size)); } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, stride_a, ldb, stride_b, batch_size)); + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, stride_a, ldb, stride_b, + batch_size)); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size(queue, m, n, k, lda, stride_a, - stride_tau, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size( + queue, m, n, k, lda, stride_a, stride_tau, batch_size)); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size(queue, m, n, k, lda, stride_a, - stride_tau, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size( + queue, m, n, k, lda, stride_a, stride_tau, batch_size)); } template <> std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( - queue, m, n, k, lda, stride_a, stride_tau, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( + queue, m, n, k, lda, stride_a, stride_tau, batch_size)); } template <> std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( - queue, m, n, k, lda, stride_a, stride_tau, batch_size)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( + queue, m, n, k, lda, stride_a, stride_tau, batch_size)); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size(queue, m, n, lda, group_count, - group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size( + queue, m, n, lda, group_count, group_sizes)); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size(queue, m, n, lda, group_count, - group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size( + queue, m, n, lda, group_count, group_sizes)); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( - queue, m, n, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( + queue, m, n, lda, group_count, group_sizes)); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( - queue, m, n, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( + queue, m, n, lda, group_count, group_sizes)); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size(queue, n, lda, group_count, - group_sizes)); +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size( + queue, n, lda, group_count, group_sizes)); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size(queue, n, lda, group_count, - group_sizes)); +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size( + queue, n, lda, group_count, group_sizes)); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size>( - queue, n, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch_scratchpad_size>( + queue, n, lda, group_count, group_sizes)); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getri_batch_scratchpad_size>( - queue, n, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getri_batch_scratchpad_size>( + queue, n, lda, group_count, group_sizes)); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size(queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, - ldb, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size( + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count, group_sizes)); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size( queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count, group_sizes)); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( - queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count, group_sizes)); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count, + group_sizes)); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( - queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count, group_sizes)); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( + queue, detail::get_onemkl_transpose(trans), n, nrhs, lda, ldb, group_count, + group_sizes)); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size(queue, m, n, lda, group_count, - group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size( + queue, m, n, lda, group_count, group_sizes)); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size(queue, m, n, lda, group_count, - group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size( + queue, m, n, lda, group_count, group_sizes)); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( - queue, m, n, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( + queue, m, n, lda, group_count, group_sizes)); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( - queue, m, n, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( + queue, m, n, lda, group_count, group_sizes)); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::int64_t *lda, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size(queue, m, n, k, lda, - group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size( + queue, m, n, k, lda, group_count, group_sizes)); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size(queue, m, n, k, lda, - group_count, group_sizes)); +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::orgqr_batch_scratchpad_size( + queue, m, n, k, lda, group_count, group_sizes)); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda, - group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes)); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, lda, - group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes)); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, - oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes)); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, - oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, lda, group_count, group_sizes)); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size(queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, - group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes)); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size( queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes)); } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes)); + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes)); } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( - queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes)); + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( + queue, detail::get_onemkl_uplo(uplo), n, nrhs, lda, ldb, group_count, group_sizes)); } template <> -std::int64_t ungqr_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( - queue, m, n, k, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( + queue, m, n, k, lda, group_count, group_sizes)); } template <> -std::int64_t ungqr_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { - RETHROW_ONEMKL_EXCEPTIONS_RET(::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( - queue, m, n, k, lda, group_count, group_sizes)); + std::int64_t* group_sizes) { + RETHROW_ONEMKL_EXCEPTIONS_RET( + ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( + queue, m, n, k, lda, group_count, group_sizes)); } diff --git a/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp b/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp index eee9e42e0..c4d03f26b 100644 --- a/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp +++ b/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp @@ -29,1235 +29,1239 @@ namespace oneapi { namespace math { namespace lapack { -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, sycl::buffer> &taup, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tauq, - sycl::buffer> &taup, sycl::buffer> &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tauq, + sycl::buffer>& taup, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, +void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, +void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, +void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, +void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, sycl::buffer &b, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tauq, double *taup, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tauq, float *taup, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, - float *s, std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *w, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *w, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *w, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *w, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* d, double* e, double* tauq, double* taup, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tauq, float* taup, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, float* s, float* u, + std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + float* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + double* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, std::int64_t **ipiv, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, std::int64_t **ipiv, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float **a, std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double **a, std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, + const std::vector& dependencies = {}); +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float** a, std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double** a, std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); template = nullptr> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::math::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda); +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); template = nullptr> -std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, +std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::math::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::math::generate vect, std::int64_t m, +std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, +std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t lda, std::int64_t stride_a, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size); +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t stride_a, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); } // namespace lapack } // namespace math diff --git a/src/lapack/backends/rocsolver/rocsolver_batch.cpp b/src/lapack/backends/rocsolver/rocsolver_batch.cpp index ae21f70af..a84be1fe6 100644 --- a/src/lapack/backends/rocsolver/rocsolver_batch.cpp +++ b/src/lapack/backends/rocsolver/rocsolver_batch.cpp @@ -31,476 +31,477 @@ namespace rocsolver { // BATCH BUFFER API -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "geqrf_batch"); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "geqrf_batch"); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "geqrf_batch"); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "geqrf_batch"); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri_batch"); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri_batch"); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri_batch"); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri_batch"); } -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, sycl::buffer &b, +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrs_batch"); } -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrs_batch"); } -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrs_batch"); } -void getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrs_batch"); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrf_batch"); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrf_batch"); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrf_batch"); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrf_batch"); } -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "orgqr_batch"); } -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "orgqr_batch"); } -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrf_batch"); } -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrf_batch"); } -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrf_batch"); } -void potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrf_batch"); } -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrs_batch"); } -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrs_batch"); } -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrs_batch"); } -void potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrs_batch"); } -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ungqr_batch"); } -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ungqr_batch"); } // BATCH USM API -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, std::int64_t **ipiv, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, std::int64_t **ipiv, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "orgqr_batch"); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "orgqr_batch"); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float **a, std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float** a, std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "orgqr_batch"); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double **a, std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double** a, std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "orgqr_batch"); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, float *a, +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrf_batch"); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, double *a, +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrf_batch"); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "potrf_batch"); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "potrf_batch"); } template -inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, T **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, T** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; int64_t batch_size = 0; @@ -509,27 +510,27 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu batch_size += group_sizes[i]; } - int *info = (int *)malloc_device(sizeof(int) * batch_size, queue); - T **a_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); + int* info = (int*)malloc_device(sizeof(int) * batch_size, queue); + T** a_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); auto done_cpy = - queue.submit([&](sycl::handler &h) { h.memcpy(a_dev, a, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(a_dev, a, batch_size * sizeof(T*)); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } cgh.depends_on(done_cpy); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a_dev); - auto *info_ = reinterpret_cast(info); - rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo[i]), - (int)n[i], a_ + offset, (int)lda[i], info_ + offset, - (int)group_sizes[i]); + auto** a_ = reinterpret_cast(a_dev); + auto* info_ = reinterpret_cast(info); + rocsolver_native_named_func(func_name, func, err, handle, + get_rocblas_fill_mode(uplo[i]), (int)n[i], a_ + offset, + (int)lda[i], info_ + offset, (int)group_sizes[i]); offset += group_sizes[i]; } }); @@ -538,13 +539,13 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu } // Scratchpad memory not needed as parts of buffer a is used as workspace memory -#define POTRF_BATCH_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event potrf_batch( \ - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, TYPE **a, std::int64_t *lda, \ - std::int64_t group_count, std::int64_t *group_sizes, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ - return potrf_batch(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, \ - group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ +#define POTRF_BATCH_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ + sycl::event potrf_batch( \ + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, TYPE** a, \ + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ + return potrf_batch(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, \ + group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } POTRF_BATCH_LAUNCHER_USM(float, rocsolver_spotrf_batched) @@ -554,44 +555,44 @@ POTRF_BATCH_LAUNCHER_USM(std::complex, rocsolver_zpotrf_batched) #undef POTRF_BATCH_LAUNCHER_USM -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrs_batch"); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrs_batch"); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrs_batch"); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrs_batch"); } template -inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, T **a, - std::int64_t *lda, T **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, T** a, + std::int64_t* lda, T** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; int64_t batch_size = 0; @@ -605,31 +606,32 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu "rocsolver potrs_batch only supports nrhs = 1"); } - T **a_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); - T **b_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); + T** a_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); + T** b_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); auto done_cpy_a = - queue.submit([&](sycl::handler &h) { h.memcpy(a_dev, a, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(a_dev, a, batch_size * sizeof(T*)); }); auto done_cpy_b = - queue.submit([&](sycl::handler &h) { h.memcpy(b_dev, b, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(b_dev, b, batch_size * sizeof(T*)); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } cgh.depends_on(done_cpy_a); cgh.depends_on(done_cpy_b); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a_dev); - auto **b_ = reinterpret_cast(b_dev); - rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo[i]), - (int)n[i], (int)nrhs[i], a_ + offset, (int)lda[i], - b_ + offset, (int)ldb[i], (int)group_sizes[i]); + auto** a_ = reinterpret_cast(a_dev); + auto** b_ = reinterpret_cast(b_dev); + rocsolver_native_named_func(func_name, func, err, handle, + get_rocblas_fill_mode(uplo[i]), (int)n[i], (int)nrhs[i], + a_ + offset, (int)lda[i], b_ + offset, (int)ldb[i], + (int)group_sizes[i]); offset += group_sizes[i]; } }); @@ -640,10 +642,10 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRS_BATCH_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ sycl::event potrs_batch( \ - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, \ - TYPE **a, std::int64_t *lda, TYPE **b, std::int64_t *ldb, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, \ + TYPE** a, std::int64_t* lda, TYPE** b, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs_batch(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, \ ldb, group_count, group_sizes, scratchpad, scratchpad_size, \ dependencies); \ @@ -656,52 +658,52 @@ POTRS_BATCH_LAUNCHER_USM(std::complex, rocsolver_zpotrs_batched) #undef POTRS_BATCH_LAUNCHER_USM -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "ungqr_batch"); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "ungqr_batch"); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "ungqr_batch"); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "ungqr_batch"); } // BATCH SCRATCHPAD API template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -709,7 +711,7 @@ std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -717,20 +719,20 @@ std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queu throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -738,7 +740,7 @@ std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -746,7 +748,7 @@ std::int64_t getri_batch_scratchpad_size>(sycl::queue &queu throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, @@ -754,7 +756,7 @@ std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, @@ -763,32 +765,32 @@ std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mat } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, @@ -796,7 +798,7 @@ std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, @@ -805,19 +807,19 @@ std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queu } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { throw unimplemented("lapack", "potrf_batch_scratchpad_size"); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { throw unimplemented("lapack", "potrf_batch_scratchpad_size"); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, @@ -825,7 +827,7 @@ std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue throw unimplemented("lapack", "potrf_batch_scratchpad_size"); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, @@ -833,14 +835,14 @@ std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queu throw unimplemented("lapack", "potrf_batch_scratchpad_size"); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("lapack", "potrs_batch_scratchpad_size"); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, @@ -849,25 +851,27 @@ std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mat } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size) { throw unimplemented("lapack", "potrs_batch_scratchpad_size"); } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, - std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size) { throw unimplemented("lapack", "potrs_batch_scratchpad_size"); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -875,148 +879,148 @@ std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_ } template <> std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); } template <> std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::int64_t *lda, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); } // rocsolverDnXpotrfBatched does not use scratchpad memory -#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t potrf_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t * n, std::int64_t * lda, \ - std::int64_t group_count, std::int64_t * group_sizes) { \ - return 0; \ +#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrf_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ + return 0; \ } POTRF_GROUP_LAUNCHER_SCRATCH(float) @@ -1027,13 +1031,13 @@ POTRF_GROUP_LAUNCHER_SCRATCH(std::complex) #undef POTRF_GROUP_LAUNCHER_SCRATCH // rocsolverDnXpotrsBatched does not use scratchpad memory -#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t potrs_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t * n, std::int64_t * nrhs, \ - std::int64_t * lda, std::int64_t * ldb, std::int64_t group_count, \ - std::int64_t * group_sizes) { \ - return 0; \ +#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrs_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* nrhs, \ + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes) { \ + return 0; \ } POTRS_GROUP_LAUNCHER_SCRATCH(float) @@ -1044,19 +1048,19 @@ POTRS_GROUP_LAUNCHER_SCRATCH(std::complex) #undef POTRS_GROUP_LAUNCHER_SCRATCH template <> -std::int64_t ungqr_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); } template <> -std::int64_t ungqr_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); } diff --git a/src/lapack/backends/rocsolver/rocsolver_handle.hpp b/src/lapack/backends/rocsolver/rocsolver_handle.hpp index 3e1a14b34..4828c0254 100644 --- a/src/lapack/backends/rocsolver/rocsolver_handle.hpp +++ b/src/lapack/backends/rocsolver/rocsolver_handle.hpp @@ -30,10 +30,10 @@ namespace rocsolver { template struct rocsolver_handle { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t rocsolver_handle_mapper_{}; ~rocsolver_handle() noexcept(false) { - for (auto &handle_pair : rocsolver_handle_mapper_) { + for (auto& handle_pair : rocsolver_handle_mapper_) { rocblas_status err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); diff --git a/src/lapack/backends/rocsolver/rocsolver_helper.hpp b/src/lapack/backends/rocsolver/rocsolver_helper.hpp index 36464f2e6..5d4e6e821 100644 --- a/src/lapack/backends/rocsolver/rocsolver_helper.hpp +++ b/src/lapack/backends/rocsolver/rocsolver_helper.hpp @@ -81,7 +81,7 @@ void overflow_check(Index index, Next... indices) { class rocsolver_error : virtual public std::runtime_error { protected: - inline const char *rocsolver_error_map(rocblas_status error) { + inline const char* rocsolver_error_map(rocblas_status error) { return rocblas_status_to_string(error); } @@ -111,7 +111,7 @@ class rocsolver_error : virtual public std::runtime_error { class hip_error : virtual public std::runtime_error { protected: - inline const char *hip_error_map(hipError_t result) { + inline const char* hip_error_map(hipError_t result) { return hipGetErrorName(result); } int error_number; ///< error number @@ -167,9 +167,8 @@ class hip_error : virtual public std::runtime_error { HIP_ERROR_FUNC(hipStreamSynchronize, hip_err, currentStreamId); template -inline void rocsolver_native_named_func(const char *func_name, Func func, - rocsolver_status err, - rocsolver_handle handle, Types... args){ +inline void rocsolver_native_named_func(const char* func_name, Func func, rocsolver_status err, + rocsolver_handle handle, Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND ROCSOLVER_ERROR_FUNC_T(func_name, func, err, handle, args...) #else @@ -258,12 +257,12 @@ struct RocmEquivalentType> { /* devinfo */ -inline int get_rocsolver_devinfo(sycl::queue &queue, sycl::buffer &devInfo) { +inline int get_rocsolver_devinfo(sycl::queue& queue, sycl::buffer& devInfo) { sycl::host_accessor dev_info_{ devInfo }; return dev_info_[0]; } -inline int get_rocsolver_devinfo(sycl::queue &queue, const int *devInfo) { +inline int get_rocsolver_devinfo(sycl::queue& queue, const int* devInfo) { int dev_info_; queue.memcpy(&dev_info_, devInfo, sizeof(int)); queue.wait(); @@ -271,8 +270,8 @@ inline int get_rocsolver_devinfo(sycl::queue &queue, const int *devInfo) { } template -inline void lapack_info_check(sycl::queue &queue, DEVINFO_T devinfo, const char *func_name, - const char *cufunc_name) { +inline void lapack_info_check(sycl::queue& queue, DEVINFO_T devinfo, const char* func_name, + const char* cufunc_name) { queue.wait(); const int devinfo_ = get_rocsolver_devinfo(queue, devinfo); if (devinfo_ > 0) diff --git a/src/lapack/backends/rocsolver/rocsolver_lapack.cpp b/src/lapack/backends/rocsolver/rocsolver_lapack.cpp index ea7a61937..5b0c265b2 100644 --- a/src/lapack/backends/rocsolver/rocsolver_lapack.cpp +++ b/src/lapack/backends/rocsolver/rocsolver_lapack.cpp @@ -32,27 +32,27 @@ namespace rocsolver { // BUFFER APIs template -inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void gebrd(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tauq_acc = tauq.template get_access(cgh); auto taup_acc = taup.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tauq_ = sc.get_mem(tauq_acc); - auto taup_ = sc.get_mem(taup_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tauq_ = sc.get_mem(tauq_acc); + auto taup_ = sc.get_mem(taup_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, d_, e_, tauq_, taup_); @@ -61,10 +61,10 @@ inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int } #define GEBRD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, \ - sycl::buffer &tauq, sycl::buffer &taup, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tauq, sycl::buffer& taup, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ gebrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, \ scratchpad, scratchpad_size); \ } @@ -76,41 +76,41 @@ GEBRD_LAUNCHER(std::complex, double, rocsolver_zgebrd) #undef GEBRD_LAUNCHER -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } template -inline void geqrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void geqrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, tau_); }); @@ -118,8 +118,8 @@ inline void geqrf(const char *func_name, Func func, sycl::queue &queue, std::int } #define GEQRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ + void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ geqrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, tau, scratchpad, \ scratchpad_size); \ @@ -133,9 +133,9 @@ GEQRF_LAUNCHER(std::complex, rocsolver_zgeqrf) #undef GEQRF_LAUNCHER template -void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -146,15 +146,15 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); sycl::buffer devInfo{ 1 }; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, ipiv32_, devInfo_); @@ -162,7 +162,7 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, }); // Copy from 32-bit buffer to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); @@ -174,8 +174,8 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, } #define GETRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ getrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, scratchpad, \ scratchpad_size); \ @@ -188,32 +188,32 @@ GETRF_LAUNCHER(std::complex, rocsolver_zgetrf) #undef GETRF_LAUNCHER -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri"); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri"); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri"); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri"); } template -inline void getrs(const char *func_name, Func func, sycl::queue &queue, +inline void getrs(const char* func_name, Func func, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, nrhs, lda, ldb); @@ -224,7 +224,7 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, std::uint64_t ipiv_size = ipiv.size(); sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -232,15 +232,15 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv_acc = ipiv32.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv_ = sc.get_mem(ipiv_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv_ = sc.get_mem(ipiv_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_operation(trans), n, nrhs, a_, lda, ipiv_, b_, ldb); @@ -249,10 +249,10 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, \ - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, \ + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ getrs(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, b, ldb, \ scratchpad, scratchpad_size); \ } @@ -265,30 +265,30 @@ GETRS_LAUNCHER(std::complex, rocsolver_zgetrs) #undef GETRS_LAUNCHER template -inline void gesvd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, +inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, m, lda, ldu, ldvt, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto s_acc = s.template get_access(cgh); auto u_acc = u.template get_access(cgh); auto vt_acc = vt.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto s_ = sc.get_mem(s_acc); - auto u_ = sc.get_mem(u_acc); - auto vt_ = sc.get_mem(vt_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto s_ = sc.get_mem(s_acc); + auto u_ = sc.get_mem(u_acc); + auto vt_ = sc.get_mem(vt_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_jobsvd(jobu), get_rocsolver_jobsvd(jobvt), m, n, a_, lda, s_, u_, ldu, @@ -300,10 +300,10 @@ inline void gesvd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define GESVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, \ - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, \ + void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, \ + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ gesvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, u, ldu, \ vt, ldvt, scratchpad, scratchpad_size); \ @@ -317,25 +317,25 @@ GESVD_LAUNCHER(std::complex, double, rocsolver_zgesvd) #undef GESVD_LAUNCHER template -inline void heevd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -345,12 +345,12 @@ inline void heevd(const char *func_name, Func func, sycl::queue &queue, oneapi:: lapack_info_check(queue, devInfo, __func__, func_name); } -#define HEEVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ - heevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ - scratchpad_size); \ +#define HEEVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ + void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + heevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ + scratchpad_size); \ } HEEVD_LAUNCHER(std::complex, float, rocsolver_cheevd) @@ -359,28 +359,28 @@ HEEVD_LAUNCHER(std::complex, double, rocsolver_zheevd) #undef HEEVD_LAUNCHER template -inline void hegvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, +inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_itype(itype), get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, @@ -390,13 +390,13 @@ inline void hegvd(const char *func_name, Func func, sycl::queue &queue, std::int lapack_info_check(queue, devInfo, __func__, func_name); } -#define HEGVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ - hegvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, \ - w, scratchpad, scratchpad_size); \ +#define HEGVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ + void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + hegvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, \ + w, scratchpad, scratchpad_size); \ } HEGVD_LAUNCHER(std::complex, float, rocsolver_chegvd) @@ -405,24 +405,24 @@ HEGVD_LAUNCHER(std::complex, double, rocsolver_zhegvd) #undef HEGVD_LAUNCHER template -inline void hetrd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, d_, e_, tau_); @@ -431,10 +431,10 @@ inline void hetrd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define HETRD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, \ - sycl::buffer &e, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, \ + sycl::buffer& e, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ hetrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size); \ } @@ -444,32 +444,32 @@ HETRD_LAUNCHER(std::complex, double, rocsolver_zhetrd) #undef HETRD_LAUNCHER -void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "hetrf"); } -void hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "hetrf"); } template -inline void orgbr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_generate(vec), m, n, k, a_, lda, tau_); @@ -478,9 +478,9 @@ inline void orgbr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORGBR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ orgbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -491,18 +491,18 @@ ORGBR_LAUNCHER(double, rocsolver_dorgbr) #undef ORGBR_LAUNCHER template -inline void orgqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void orgqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_); }); @@ -510,9 +510,9 @@ inline void orgqr(const char *func_name, Func func, sycl::queue &queue, std::int } #define ORGQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ orgqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -523,18 +523,18 @@ ORGQR_LAUNCHER(double, rocsolver_dorgqr) #undef ORGQR_LAUNCHER template -inline void orgtr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, tau_); @@ -542,12 +542,12 @@ inline void orgtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: }); } -#define ORGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ - std::int64_t scratchpad_size) { \ - orgtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ - scratchpad_size); \ +#define ORGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ + void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + orgtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ + scratchpad_size); \ } ORGTR_LAUNCHER(float, rocsolver_sorgtr) @@ -556,22 +556,22 @@ ORGTR_LAUNCHER(double, rocsolver_dorgtr) #undef ORGTR_LAUNCHER template -inline void ormtr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::side side, +inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_fill_mode(uplo), get_rocblas_operation(trans), @@ -581,10 +581,10 @@ inline void ormtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORMTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, \ - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, \ + void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ ormtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, \ c, ldc, scratchpad, scratchpad_size); \ @@ -595,35 +595,35 @@ ORMTR_LAUNCHER(double, rocsolver_dormtr) #undef ORMTR_LAUNCHER -void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ormrq"); } -void ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ormrq"); } template -inline void ormqr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::side side, +inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_operation(trans), m, n, k, a_, lda, tau_, c_, @@ -633,10 +633,10 @@ inline void ormqr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORMQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ormqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ } @@ -647,19 +647,19 @@ ORMQR_LAUNCHER(double, rocsolver_dormqr) #undef ORMQR_LAUNCHER template -inline void potrf(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, devInfo_); @@ -668,11 +668,11 @@ inline void potrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: lapack_info_check(queue, devInfo, __func__, func_name); } -#define POTRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ - potrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ - scratchpad_size); \ +#define POTRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ + void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + potrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ + scratchpad_size); \ } POTRF_LAUNCHER(float, rocsolver_spotrf) @@ -683,19 +683,19 @@ POTRF_LAUNCHER(std::complex, rocsolver_zpotrf) #undef POTRF_LAUNCHER template -inline void potri(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, devInfo_); @@ -704,11 +704,11 @@ inline void potri(const char *func_name, Func func, sycl::queue &queue, oneapi:: lapack_info_check(queue, devInfo, __func__, func_name); } -#define POTRI_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ - potri(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ - scratchpad_size); \ +#define POTRI_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ + void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + potri(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ + scratchpad_size); \ } POTRI_LAUNCHER(float, rocsolver_spotri) @@ -719,19 +719,19 @@ POTRI_LAUNCHER(std::complex, rocsolver_zpotri) #undef POTRI_LAUNCHER template -inline void potrs(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, nrhs, a_, lda, b_, ldb); @@ -740,9 +740,9 @@ inline void potrs(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define POTRS_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ potrs(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb, \ scratchpad, scratchpad_size); \ } @@ -755,23 +755,23 @@ POTRS_LAUNCHER(std::complex, rocsolver_zpotrs) #undef POTRS_LAUNCHER template -inline void syevd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -782,9 +782,9 @@ inline void syevd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define SYEVD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ syevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ scratchpad_size); \ } @@ -795,26 +795,26 @@ SYEVD_LAUNCHER(double, rocsolver_dsyevd) #undef SYEVD_LAUNCHER template -inline void sygvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_itype(itype), get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, @@ -825,10 +825,10 @@ inline void sygvd(const char *func_name, Func func, sycl::queue &queue, std::int } #define SYGVD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ sygvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, \ w, scratchpad, scratchpad_size); \ } @@ -839,23 +839,23 @@ SYGVD_LAUNCHER(double, rocsolver_dsygvd) #undef SYGVD_LAUNCH template -inline void sytrd(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, d_, e_, tau_); @@ -863,13 +863,13 @@ inline void sytrd(const char *func_name, Func func, sycl::queue &queue, oneapi:: }); } -#define SYTRD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, \ - sycl::buffer &tau, sycl::buffer &scratchpad, \ - std::int64_t scratchpad_size) { \ - sytrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ - scratchpad, scratchpad_size); \ +#define SYTRD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ + void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + sytrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ + scratchpad, scratchpad_size); \ } SYTRD_LAUNCHER(float, rocsolver_ssytrd) @@ -878,9 +878,9 @@ SYTRD_LAUNCHER(double, rocsolver_dsytrd) #undef SYTRD_LAUNCHER template -inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); @@ -892,15 +892,15 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: std::uint64_t ipiv_size = n; sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, ipiv32_, devInfo_); @@ -908,7 +908,7 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: }); // Copy from 32-bit buffer to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); @@ -920,8 +920,8 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define SYTRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ sytrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad, \ scratchpad_size); \ @@ -934,47 +934,47 @@ SYTRF_LAUNCHER(std::complex, rocsolver_zsytrf) #undef SYTRF_LAUNCHER -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } template -inline void ungbr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_generate(vec), m, n, k, a_, lda, tau_); @@ -983,9 +983,9 @@ inline void ungbr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNGBR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ungbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -996,18 +996,18 @@ UNGBR_LAUNCHER(std::complex, rocsolver_zungbr) #undef UNGBR_LAUNCHER template -inline void ungqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void ungqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_); }); @@ -1015,9 +1015,9 @@ inline void ungqr(const char *func_name, Func func, sycl::queue &queue, std::int } #define UNGQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ungqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -1028,18 +1028,18 @@ UNGQR_LAUNCHER(std::complex, rocsolver_zungqr) #undef UNGQR_LAUNCHER template -inline void ungtr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, tau_); @@ -1047,12 +1047,12 @@ inline void ungtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: }); } -#define UNGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ - std::int64_t scratchpad_size) { \ - ungtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ - scratchpad_size); \ +#define UNGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ + void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + ungtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ + scratchpad_size); \ } UNGTR_LAUNCHER(std::complex, rocsolver_cungtr) @@ -1060,37 +1060,37 @@ UNGTR_LAUNCHER(std::complex, rocsolver_zungtr) #undef UNGTR_LAUNCHER -void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "unmrq"); } -void unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "unmrq"); } template -inline void unmqr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::side side, +inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_operation(trans), m, n, k, a_, lda, tau_, c_, @@ -1100,10 +1100,10 @@ inline void unmqr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNMQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ unmqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ } @@ -1114,22 +1114,22 @@ UNMQR_LAUNCHER(std::complex, rocsolver_zunmqr) #undef UNMQR_LAUNCHER template -inline void unmtr(const char *func_name, Func func, sycl::queue &queue, oneapi::math::side side, +inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_fill_mode(uplo), get_rocblas_operation(trans), @@ -1139,10 +1139,10 @@ inline void unmtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNMTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, \ - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, \ + void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ unmtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, \ c, ldc, scratchpad, scratchpad_size); \ @@ -1156,26 +1156,26 @@ UNMTR_LAUNCHER(std::complex, rocsolver_zunmtr) // USM APIs template -inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T_A *a, std::int64_t lda, T_B *d, T_B *e, T_A *tauq, - T_A *taup, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event gebrd(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T_A* a, std::int64_t lda, T_B* d, T_B* e, T_A* tauq, + T_A* taup, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tauq_ = reinterpret_cast(tauq); - auto taup_ = reinterpret_cast(taup); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tauq_ = reinterpret_cast(tauq); + auto taup_ = reinterpret_cast(taup); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, d_, e_, tauq_, taup_); @@ -1185,10 +1185,10 @@ inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, s } #define GEBRD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE_A *a, \ - std::int64_t lda, TYPE_B *d, TYPE_B *e, TYPE_A *tauq, TYPE_A *taup, \ - TYPE_A *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tauq, TYPE_A* taup, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return gebrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1200,43 +1200,43 @@ GEBRD_LAUNCHER_USM(std::complex, double, rocsolver_zgebrd) #undef GEBRD_LAUNCHER_USM -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } template -inline sycl::event geqrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, T *tau, T *scratchpad, +inline sycl::event geqrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, tau_); }); @@ -1245,9 +1245,9 @@ inline sycl::event geqrf(const char *func_name, Func func, sycl::queue &queue, s } #define GEQRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return geqrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1260,10 +1260,10 @@ GEQRF_LAUNCHER_USM(std::complex, rocsolver_zgeqrf) #undef GEQRF_LAUNCHER_USM template -inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, std::int64_t *ipiv, T *scratchpad, +inline sycl::event getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -1271,19 +1271,19 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = std::min(n, m); - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); - auto ipiv_ = reinterpret_cast(ipiv32); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); + auto ipiv_ = reinterpret_cast(ipiv32); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, ipiv_, devInfo_); @@ -1291,7 +1291,7 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s }); // Copy from 32-bit USM to 64-bit - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = static_cast(ipiv32[index]); @@ -1305,10 +1305,10 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s } #define GETRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t *ipiv, TYPE *scratchpad, \ + sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return getrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1320,33 +1320,33 @@ GETRF_LAUNCHER_USM(std::complex, rocsolver_zgetrf) #undef GETRF_LAUNCHER_USM -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri"); } -sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri"); } -sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri"); } -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "getri"); } template -inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, std::int64_t *ipiv, T *b, std::int64_t ldb, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, std::int64_t* ipiv, T* b, std::int64_t ldb, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); @@ -1354,25 +1354,25 @@ inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, // To get around the limitation. // Create new buffer and convert 64-bit values. std::uint64_t ipiv_size = n; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv32[index] = static_cast(ipiv[index]); }); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } cgh.depends_on(done_casting); - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto ipiv_ = reinterpret_cast(ipiv32); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv32); + auto b_ = reinterpret_cast(b); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_operation(trans), n, nrhs, a_, lda, ipiv_, b_, ldb); @@ -1387,10 +1387,10 @@ inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event getrs(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, std::int64_t *ipiv, TYPE *b, \ - std::int64_t ldb, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t* ipiv, TYPE* b, \ + std::int64_t ldb, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return getrs(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, \ b, ldb, scratchpad, scratchpad_size, dependencies); \ } @@ -1403,28 +1403,28 @@ GETRS_LAUNCHER_USM(std::complex, rocsolver_zgetrs) #undef GETRS_LAUNCHER_USM template -inline sycl::event gesvd(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, - std::int64_t n, T_A *a, std::int64_t lda, T_B *s, T_A *u, std::int64_t ldu, - T_A *vt, std::int64_t ldvt, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, T_A* a, std::int64_t lda, T_B* s, T_A* u, std::int64_t ldu, + T_A* vt, std::int64_t ldvt, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldu, ldvt, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto s_ = reinterpret_cast(s); - auto u_ = reinterpret_cast(u); - auto vt_ = reinterpret_cast(vt); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto s_ = reinterpret_cast(s); + auto u_ = reinterpret_cast(u); + auto vt_ = reinterpret_cast(vt); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_jobsvd(jobu), get_rocsolver_jobsvd(jobvt), m, n, a_, lda, s_, u_, ldu, @@ -1438,11 +1438,11 @@ inline sycl::event gesvd(const char *func_name, Func func, sycl::queue &queue, } #define GESVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event gesvd(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ - std::int64_t m, std::int64_t n, TYPE_A *a, std::int64_t lda, TYPE_B *s, \ - TYPE_A *u, std::int64_t ldu, TYPE_A *vt, std::int64_t ldvt, \ - TYPE_A *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* s, \ + TYPE_A* u, std::int64_t ldu, TYPE_A* vt, std::int64_t ldvt, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return gesvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, \ u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); \ } @@ -1455,25 +1455,25 @@ GESVD_LAUNCHER_USM(std::complex, double, rocsolver_zgesvd) #undef GESVD_LAUNCHER_USM template -inline sycl::event heevd(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A *&a, - std::int64_t lda, T_B *&w, T_A *&scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -1486,10 +1486,10 @@ inline sycl::event heevd(const char *func_name, Func func, sycl::queue &queue, } #define HEEVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event heevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ - std::int64_t n, TYPE_A *a, std::int64_t lda, TYPE_B *w, TYPE_A *scratchpad, \ + sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* w, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return heevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1500,27 +1500,27 @@ HEEVD_LAUNCHER_USM(std::complex, double, rocsolver_zheevd) #undef HEEVD_LAUNCHER_USM template -inline sycl::event hegvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A *&a, - std::int64_t lda, T_A *&b, std::int64_t ldb, T_B *&w, T_A *&scratchpad, +inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_A*& b, std::int64_t ldb, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_itype(itype), get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, @@ -1533,11 +1533,11 @@ inline sycl::event hegvd(const char *func_name, Func func, sycl::queue &queue, s } #define HEGVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, TYPE_A *a, std::int64_t lda, \ - TYPE_A *b, std::int64_t ldb, TYPE_B *w, TYPE_A *scratchpad, \ + sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a, std::int64_t lda, \ + TYPE_A* b, std::int64_t ldb, TYPE_B* w, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return hegvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, \ b, ldb, w, scratchpad, scratchpad_size, dependencies); \ } @@ -1548,24 +1548,24 @@ HEGVD_LAUNCHER_USM(std::complex, double, rocsolver_zhegvd) #undef HEGVD_LAUNCHER_USM template -inline sycl::event hetrd(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T_A *a, std::int64_t lda, T_B *d, - T_B *e, T_A *tau, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T_A* a, std::int64_t lda, T_B* d, + T_B* e, T_A* tau, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, d_, e_, tau_); @@ -1575,10 +1575,10 @@ inline sycl::event hetrd(const char *func_name, Func func, sycl::queue &queue, } #define HETRD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event hetrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE_A *a, \ - std::int64_t lda, TYPE_B *d, TYPE_B *e, TYPE_A *tau, TYPE_A *scratchpad, \ + sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tau, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return hetrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1588,36 +1588,36 @@ HETRD_LAUNCHER_USM(std::complex, double, rocsolver_zhetrd) #undef HETRD_LAUNCHER_USM -sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "hetrf"); } -sycl::event hetrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "hetrf"); } template -inline sycl::event orgbr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - T *a, std::int64_t lda, T *tau, T *scratchpad, + T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_generate(vec), m, n, k, a_, lda, tau_); @@ -1627,10 +1627,10 @@ inline sycl::event orgbr(const char *func_name, Func func, sycl::queue &queue, } #define ORGBR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event orgbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, \ - std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, TYPE *tau, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1641,21 +1641,21 @@ ORGBR_LAUNCHER_USM(double, rocsolver_dorgbr) #undef ORGBR_LAUNCHER_USM template -inline sycl::event orgqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_); }); @@ -1664,9 +1664,9 @@ inline sycl::event orgqr(const char *func_name, Func func, sycl::queue &queue, s } #define ORGQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1677,21 +1677,21 @@ ORGQR_LAUNCHER_USM(double, rocsolver_dorgqr) #undef ORGQR_LAUNCHER_USM template -inline sycl::event orgtr(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, tau_); @@ -1701,9 +1701,9 @@ inline sycl::event orgtr(const char *func_name, Func func, sycl::queue &queue, } #define ORGTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event orgtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1714,24 +1714,24 @@ ORGTR_LAUNCHER_USM(double, rocsolver_dorgtr) #undef ORGTR_LAUNCHER_USM template -inline sycl::event ormtr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T *a, - std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_fill_mode(uplo), get_rocblas_operation(trans), @@ -1742,11 +1742,11 @@ inline sycl::event ormtr(const char *func_name, Func func, sycl::queue &queue, } #define ORMTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ormtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, \ - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ormtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, \ lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -1756,37 +1756,37 @@ ORMTR_LAUNCHER_USM(double, rocsolver_dormtr) #undef ORMTR_LAUNCHER_USM -sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "ormrq"); } -sycl::event ormrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "ormrq"); } template -inline sycl::event ormqr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, - std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_operation(trans), m, n, k, a_, lda, tau_, c_, @@ -1796,14 +1796,14 @@ inline sycl::event ormqr(const char *func_name, Func func, sycl::queue &queue, return done; } -#define ORMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ormqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, \ - TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ - std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ - return ormqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ - tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ +#define ORMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ + sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return ormqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ + tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } ORMQR_LAUNCHER_USM(float, rocsolver_sormqr) @@ -1812,22 +1812,22 @@ ORMQR_LAUNCHER_USM(double, rocsolver_dormqr) #undef ORMQR_LAUNCHER_USM template -inline sycl::event potrf(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, devInfo_); @@ -1839,9 +1839,9 @@ inline sycl::event potrf(const char *func_name, Func func, sycl::queue &queue, } #define POTRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event potrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1854,23 +1854,23 @@ POTRF_LAUNCHER_USM(std::complex, rocsolver_zpotrf) #undef POTRF_LAUNCHER_USM template -inline sycl::event potri(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto scratch_ = reinterpret_cast(scratchpad); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto scratch_ = reinterpret_cast(scratchpad); + auto devInfo_ = reinterpret_cast(devInfo); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, devInfo_); @@ -1882,9 +1882,9 @@ inline sycl::event potri(const char *func_name, Func func, sycl::queue &queue, } #define POTRI_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event potri(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potri(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1897,22 +1897,22 @@ POTRI_LAUNCHER_USM(std::complex, rocsolver_zpotri) #undef POTRI_LAUNCHER_USM template -inline sycl::event potrs(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, T *b, std::int64_t ldb, T *scratchpad, +inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, T* b, std::int64_t ldb, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, nrhs, a_, lda, b_, ldb); @@ -1922,10 +1922,10 @@ inline sycl::event potrs(const char *func_name, Func func, sycl::queue &queue, } #define POTRS_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event potrs(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, TYPE *b, std::int64_t ldb, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, TYPE* b, std::int64_t ldb, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1938,24 +1938,24 @@ POTRS_LAUNCHER_USM(std::complex, rocsolver_zpotrs) #undef POTRS_LAUNCHER_USM template -inline sycl::event syevd(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T *a, - std::int64_t lda, T *w, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* w, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto w_ = reinterpret_cast(w); - auto scratch_ = reinterpret_cast(scratchpad); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + auto scratch_ = reinterpret_cast(scratchpad); + auto devInfo_ = reinterpret_cast(devInfo); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -1968,10 +1968,10 @@ inline sycl::event syevd(const char *func_name, Func func, sycl::queue &queue, } #define SYEVD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event syevd(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ - std::int64_t n, TYPE *a, std::int64_t lda, TYPE *w, TYPE *scratchpad, \ + sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, TYPE* a, std::int64_t lda, TYPE* w, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return syevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1982,26 +1982,26 @@ SYEVD_LAUNCHER_USM(double, rocsolver_dsyevd) #undef SYEVD_LAUNCHER_USM template -inline sycl::event sygvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T *a, - std::int64_t lda, T *b, std::int64_t ldb, T *w, T *scratchpad, +inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* b, std::int64_t ldb, T* w, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_itype(itype), get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, @@ -2013,13 +2013,13 @@ inline sycl::event sygvd(const char *func_name, Func func, sycl::queue &queue, s return done; } -#define SYGVD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, \ - oneapi::math::uplo uplo, std::int64_t n, TYPE *a, std::int64_t lda, TYPE *b, \ - std::int64_t ldb, TYPE *w, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ - return sygvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, \ - b, ldb, w, scratchpad, scratchpad_size, dependencies); \ +#define SYGVD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ + sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, TYPE* a, std::int64_t lda, TYPE* b, \ + std::int64_t ldb, TYPE* w, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return sygvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, \ + b, ldb, w, scratchpad, scratchpad_size, dependencies); \ } SYGVD_LAUNCHER_USM(float, rocsolver_ssygvd) @@ -2028,23 +2028,23 @@ SYGVD_LAUNCHER_USM(double, rocsolver_dsygvd) #undef SYGVD_LAUNCHER_USM template -inline sycl::event sytrd(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *d, T *e, - T *tau, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* d, + T* e, T* tau, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, d_, e_, tau_); @@ -2054,10 +2054,10 @@ inline sycl::event sytrd(const char *func_name, Func func, sycl::queue &queue, } #define SYTRD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event sytrd(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *d, TYPE *e, TYPE *tau, TYPE *scratchpad, \ + sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* d, TYPE* e, TYPE* tau, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return sytrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2068,30 +2068,30 @@ SYTRD_LAUNCHER_USM(double, rocsolver_dsytrd) #undef SYTRD_LAUNCHER_USM template -inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - std::int64_t *ipiv, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); + int* devInfo = (int*)malloc_device(sizeof(int), queue); // rocsolver legacy api does not accept 64-bit ints. // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = n; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto ipiv_ = reinterpret_cast(ipiv32); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv32); + auto devInfo_ = reinterpret_cast(devInfo); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, ipiv_, devInfo_); @@ -2099,7 +2099,7 @@ inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, }); // Copy from 32-bit USM to 64-bit - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = static_cast(ipiv32[index]); @@ -2112,13 +2112,13 @@ inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, return done_casting; } -#define SYTRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event sytrf(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t *ipiv, TYPE *scratchpad, \ - std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ - return sytrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, \ - scratchpad, scratchpad_size, dependencies); \ +#define SYTRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ + sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return sytrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, \ + scratchpad, scratchpad_size, dependencies); \ } SYTRF_LAUNCHER_USM(float, rocsolver_ssytrf) @@ -2128,50 +2128,50 @@ SYTRF_LAUNCHER_USM(std::complex, rocsolver_zsytrf) #undef SYTRF_LAUNCHER_USM -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } template -inline sycl::event ungbr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - T *a, std::int64_t lda, T *tau, T *scratchpad, + T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_generate(vec), m, n, k, a_, lda, tau_); @@ -2181,10 +2181,10 @@ inline sycl::event ungbr(const char *func_name, Func func, sycl::queue &queue, } #define UNGBR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ungbr(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, \ - std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, TYPE *tau, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2195,21 +2195,21 @@ UNGBR_LAUNCHER_USM(std::complex, rocsolver_zungbr) #undef UNGBR_LAUNCHER_USM template -inline sycl::event ungqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_); }); @@ -2218,9 +2218,9 @@ inline sycl::event ungqr(const char *func_name, Func func, sycl::queue &queue, s } #define UNGQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2231,21 +2231,21 @@ UNGQR_LAUNCHER_USM(std::complex, rocsolver_zungqr) #undef UNGQR_LAUNCHER_USM template -inline sycl::event ungtr(const char *func_name, Func func, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, tau_); @@ -2255,9 +2255,9 @@ inline sycl::event ungtr(const char *func_name, Func func, sycl::queue &queue, } #define UNGTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ungtr(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2267,39 +2267,39 @@ UNGTR_LAUNCHER_USM(std::complex, rocsolver_zungtr) #undef UNGTR_LAUNCHER_USM -sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "unmrq"); } -sycl::event unmrq(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "unmrq"); } template -inline sycl::event unmqr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, - std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_operation(trans), m, n, k, a_, lda, tau_, c_, @@ -2309,14 +2309,14 @@ inline sycl::event unmqr(const char *func_name, Func func, sycl::queue &queue, return done; } -#define UNMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event unmqr(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, \ - TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ - std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ - return unmqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ - tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ +#define UNMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ + sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return unmqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ + tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } UNMQR_LAUNCHER_USM(std::complex, rocsolver_cunmqr) @@ -2325,24 +2325,24 @@ UNMQR_LAUNCHER_USM(std::complex, rocsolver_zunmqr) #undef UNMQR_LAUNCHER_USM template -inline sycl::event unmtr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T *a, - std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemath_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_fill_mode(uplo), get_rocblas_operation(trans), @@ -2353,11 +2353,11 @@ inline sycl::event unmtr(const char *func_name, Func func, sycl::queue &queue, } #define UNMTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event unmtr(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, \ - oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return unmtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, \ lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -2384,22 +2384,22 @@ GEBRD_LAUNCHER_SCRATCH(std::complex) #undef GEBRD_LAUNCHER_SCRATCH template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } @@ -2418,12 +2418,12 @@ GEQRF_LAUNCHER_SCRATCH(std::complex) #undef GEQRF_LAUNCHER_SCRATCH -#define GESVD_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t gesvd_scratchpad_size( \ - sycl::queue & queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, \ - std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { \ - return std::min(m, n) - 1; \ +#define GESVD_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t gesvd_scratchpad_size( \ + sycl::queue & queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { \ + return std::min(m, n) - 1; \ } GESVD_LAUNCHER_SCRATCH(float) @@ -2448,27 +2448,27 @@ GETRF_LAUNCHER_SCRATCH(std::complex) #undef GETRF_LAUNCHER_SCRATCH template <> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "getri_scratchpad_size"); } template <> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "getri_scratchpad_size"); } template <> -std::int64_t getri_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "getri_scratchpad_size"); } template <> -std::int64_t getri_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "getri_scratchpad_size"); } #define GETRS_LAUNCHER_SCRATCH(TYPE) \ template <> \ - std::int64_t getrs_scratchpad_size(sycl::queue & queue, oneapi::math::transpose trans, \ + std::int64_t getrs_scratchpad_size(sycl::queue & queue, oneapi::math::transpose trans, \ std::int64_t n, std::int64_t nrhs, std::int64_t lda, \ std::int64_t ldb) { \ return 0; \ @@ -2481,12 +2481,12 @@ GETRS_LAUNCHER_SCRATCH(std::complex) #undef GETRS_LAUNCHER_SCRATCH -#define HEEVD_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define HEEVD_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t heevd_scratchpad_size(sycl::queue & queue, oneapi::math::job jobz, \ oneapi::math::uplo uplo, std::int64_t n, \ - std::int64_t lda) { \ - return n; \ + std::int64_t lda) { \ + return n; \ } HEEVD_LAUNCHER_SCRATCH(std::complex) @@ -2497,7 +2497,7 @@ HEEVD_LAUNCHER_SCRATCH(std::complex) #define HEGVD_LAUNCHER_SCRATCH(TYPE) \ template <> \ std::int64_t hegvd_scratchpad_size(sycl::queue & queue, std::int64_t itype, \ - oneapi::math::job jobz, oneapi::math::uplo uplo, \ + oneapi::math::job jobz, oneapi::math::uplo uplo, \ std::int64_t n, std::int64_t lda, std::int64_t ldb) { \ return n; \ } @@ -2507,11 +2507,11 @@ HEGVD_LAUNCHER_SCRATCH(std::complex) #undef HEGVD_LAUNCHER_SCRATCH -#define HETRD_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define HETRD_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t hetrd_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - return 0; \ + std::int64_t n, std::int64_t lda) { \ + return 0; \ } HETRD_LAUNCHER_SCRATCH(std::complex) @@ -2520,22 +2520,23 @@ HETRD_LAUNCHER_SCRATCH(std::complex) #undef HETRD_LAUNCHER_SCRATCH template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "hetrf_scratchpad_size"); } template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { throw unimplemented("lapack", "hetrf_scratchpad_size"); } -#define ORGBR_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define ORGBR_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t orgbr_scratchpad_size(sycl::queue & queue, oneapi::math::generate vec, \ - std::int64_t m, std::int64_t n, std::int64_t k, \ - std::int64_t lda) { \ - return 0; \ + std::int64_t m, std::int64_t n, std::int64_t k, \ + std::int64_t lda) { \ + return 0; \ } ORGBR_LAUNCHER_SCRATCH(float) @@ -2543,11 +2544,11 @@ ORGBR_LAUNCHER_SCRATCH(double) #undef ORGBR_LAUNCHER_SCRATCH -#define ORGTR_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define ORGTR_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t orgtr_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - return 0; \ + std::int64_t n, std::int64_t lda) { \ + return 0; \ } ORGTR_LAUNCHER_SCRATCH(float) @@ -2568,26 +2569,26 @@ ORGQR_LAUNCHER_SCRATCH(double) #undef ORGQR_LAUNCHER_SCRATCH template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { throw unimplemented("lapack", "ormrq_scratchpad_size"); } template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::math::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { throw unimplemented("lapack", "ormrq_scratchpad_size"); } -#define ORMQRF_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t ormqr_scratchpad_size( \ - sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, \ - std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ - return 0; \ +#define ORMQRF_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t ormqr_scratchpad_size( \ + sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ + return 0; \ } ORMQRF_LAUNCHER_SCRATCH(float) @@ -2597,10 +2598,10 @@ ORMQRF_LAUNCHER_SCRATCH(double) #define ORMTR_LAUNCHER_SCRATCH(TYPE) \ template <> \ - std::int64_t ormtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ - oneapi::math::uplo uplo, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t lda, \ - std::int64_t ldc) { \ + std::int64_t ormtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ + oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, \ + std::int64_t n, std::int64_t lda, std::int64_t ldc) { \ return 0; \ } @@ -2609,11 +2610,11 @@ ORMTR_LAUNCHER_SCRATCH(double) #undef ORMTR_LAUNCHER_SCRATCH -#define POTRF_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define POTRF_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t potrf_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - return 0; \ + std::int64_t n, std::int64_t lda) { \ + return 0; \ } POTRF_LAUNCHER_SCRATCH(float) @@ -2625,7 +2626,7 @@ POTRF_LAUNCHER_SCRATCH(std::complex) #define POTRS_LAUNCHER_SCRATCH(TYPE) \ template <> \ - std::int64_t potrs_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t potrs_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ std::int64_t n, std::int64_t nrhs, std::int64_t lda, \ std::int64_t ldb) { \ return 0; \ @@ -2638,11 +2639,11 @@ POTRS_LAUNCHER_SCRATCH(std::complex) #undef POTRS_LAUNCHER_SCRATCH -#define POTRI_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define POTRI_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t potri_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - return 0; \ + std::int64_t n, std::int64_t lda) { \ + return 0; \ } POTRI_LAUNCHER_SCRATCH(float) @@ -2652,11 +2653,11 @@ POTRI_LAUNCHER_SCRATCH(std::complex) #undef POTRI_LAUNCHER_SCRATCH -#define SYTRF_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define SYTRF_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t sytrf_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - return 0; \ + std::int64_t n, std::int64_t lda) { \ + return 0; \ } SYTRF_LAUNCHER_SCRATCH(float) @@ -2666,12 +2667,12 @@ SYTRF_LAUNCHER_SCRATCH(std::complex) #undef SYTRF_LAUNCHER_SCRATCH -#define SYEVD_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define SYEVD_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t syevd_scratchpad_size(sycl::queue & queue, oneapi::math::job jobz, \ oneapi::math::uplo uplo, std::int64_t n, \ - std::int64_t lda) { \ - return n; \ + std::int64_t lda) { \ + return n; \ } SYEVD_LAUNCHER_SCRATCH(float) @@ -2682,7 +2683,7 @@ SYEVD_LAUNCHER_SCRATCH(double) #define SYGVD_LAUNCHER_SCRATCH(TYPE) \ template <> \ std::int64_t sygvd_scratchpad_size(sycl::queue & queue, std::int64_t itype, \ - oneapi::math::job jobz, oneapi::math::uplo uplo, \ + oneapi::math::job jobz, oneapi::math::uplo uplo, \ std::int64_t n, std::int64_t lda, std::int64_t ldb) { \ return n; \ } @@ -2692,11 +2693,11 @@ SYGVD_LAUNCHER_SCRATCH(double) #undef SYGVD_LAUNCHER_SCRATCH -#define SYTRD_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define SYTRD_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t sytrd_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - return 0; \ + std::int64_t n, std::int64_t lda) { \ + return 0; \ } SYTRD_LAUNCHER_SCRATCH(float) @@ -2704,13 +2705,13 @@ SYTRD_LAUNCHER_SCRATCH(double) #undef SYTRD_LAUNCHER_SCRATCH -#define TRTRS_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t trtrs_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - oneapi::math::transpose trans, oneapi::math::diag diag, \ - std::int64_t n, std::int64_t nrhs, std::int64_t lda, \ - std::int64_t ldb) { \ - return 0; \ +#define TRTRS_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t trtrs_scratchpad_size( \ + sycl::queue & queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, \ + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, \ + std::int64_t ldb) { \ + return 0; \ } TRTRS_LAUNCHER_SCRATCH(float) @@ -2720,12 +2721,12 @@ TRTRS_LAUNCHER_SCRATCH(std::complex) #undef TRTRS_LAUNCHER_SCRATCH -#define UNGBR_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define UNGBR_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t ungbr_scratchpad_size(sycl::queue & queue, oneapi::math::generate vec, \ - std::int64_t m, std::int64_t n, std::int64_t k, \ - std::int64_t lda) { \ - return 0; \ + std::int64_t m, std::int64_t n, std::int64_t k, \ + std::int64_t lda) { \ + return 0; \ } UNGBR_LAUNCHER_SCRATCH(std::complex) @@ -2745,11 +2746,11 @@ UNGQR_LAUNCHER_SCRATCH(std::complex) #undef UNGQR_LAUNCHER_SCRATCH -#define UNGTR_LAUNCHER_SCRATCH(TYPE) \ - template <> \ +#define UNGTR_LAUNCHER_SCRATCH(TYPE) \ + template <> \ std::int64_t ungtr_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ - std::int64_t n, std::int64_t lda) { \ - return 0; \ + std::int64_t n, std::int64_t lda) { \ + return 0; \ } UNGTR_LAUNCHER_SCRATCH(std::complex) @@ -2758,7 +2759,7 @@ UNGTR_LAUNCHER_SCRATCH(std::complex) #undef UNGTR_LAUNCHER_SCRATCH template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, +std::int64_t unmrq_scratchpad_size>(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2766,20 +2767,18 @@ std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, onea throw unimplemented("lapack", "unmrq_scratchpad_size"); } template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc) { +std::int64_t unmrq_scratchpad_size>( + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { throw unimplemented("lapack", "unmrq_scratchpad_size"); } -#define UNMQR_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t unmqr_scratchpad_size( \ - sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, \ - std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ - return 0; \ +#define UNMQR_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t unmqr_scratchpad_size( \ + sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ + return 0; \ } UNMQR_LAUNCHER_SCRATCH(std::complex) @@ -2789,10 +2788,10 @@ UNMQR_LAUNCHER_SCRATCH(std::complex) #define UNMTR_LAUNCHER_SCRATCH(TYPE) \ template <> \ - std::int64_t unmtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ - oneapi::math::uplo uplo, oneapi::math::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t lda, \ - std::int64_t ldc) { \ + std::int64_t unmtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ + oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, \ + std::int64_t n, std::int64_t lda, std::int64_t ldc) { \ return 0; \ } diff --git a/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp b/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp index 306c43f38..264515d6e 100644 --- a/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp +++ b/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp @@ -46,7 +46,7 @@ thread_local rocsolver_handle RocsolverScopedContextHandler::handle_ #endif RocsolverScopedContextHandler::RocsolverScopedContextHandler(sycl::queue queue, - sycl::interop_handle &ih) + sycl::interop_handle& ih) : ih(ih), needToRecover_(false) { placedContext_ = new sycl::context(queue.get_context()); @@ -76,8 +76,8 @@ RocsolverScopedContextHandler::~RocsolverScopedContextHandler() noexcept(false) delete placedContext_; } -void ContextCallback(void *userData) { - auto *ptr = static_cast *>(userData); +void ContextCallback(void* userData) { + auto* ptr = static_cast*>(userData); if (!ptr) { return; } @@ -95,7 +95,7 @@ void ContextCallback(void *userData) { } } -rocblas_handle RocsolverScopedContextHandler::get_handle(const sycl::queue &queue) { +rocblas_handle RocsolverScopedContextHandler::get_handle(const sycl::queue& queue) { auto hipDevice = ih.get_native_device(); hipError_t hipErr; hipCtx_t desired; @@ -142,10 +142,10 @@ rocblas_handle RocsolverScopedContextHandler::get_handle(const sycl::queue &queu return handle; } -hipStream_t RocsolverScopedContextHandler::get_stream(const sycl::queue &queue) { +hipStream_t RocsolverScopedContextHandler::get_stream(const sycl::queue& queue) { return sycl::get_native(queue); } -sycl::context RocsolverScopedContextHandler::get_context(const sycl::queue &queue) { +sycl::context RocsolverScopedContextHandler::get_context(const sycl::queue& queue) { return queue.get_context(); } diff --git a/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp b/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp index e1036e034..4d3b12a8c 100644 --- a/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp +++ b/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp @@ -52,23 +52,23 @@ namespace rocsolver { class RocsolverScopedContextHandler { hipCtx_t original_; - sycl::context *placedContext_; + sycl::context* placedContext_; bool needToRecover_; - sycl::interop_handle &ih; + sycl::interop_handle& ih; #ifdef ONEMATH_PI_INTERFACE_REMOVED static thread_local rocsolver_handle handle_helper; #else static thread_local rocsolver_handle handle_helper; #endif - hipStream_t get_stream(const sycl::queue &queue); - sycl::context get_context(const sycl::queue &queue); + hipStream_t get_stream(const sycl::queue& queue); + sycl::context get_context(const sycl::queue& queue); public: - RocsolverScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + RocsolverScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); ~RocsolverScopedContextHandler() noexcept(false); - rocblas_handle get_handle(const sycl::queue &queue); + rocblas_handle get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. template diff --git a/src/lapack/backends/rocsolver/rocsolver_task.hpp b/src/lapack/backends/rocsolver/rocsolver_task.hpp index 061e34504..c89734bd3 100644 --- a/src/lapack/backends/rocsolver/rocsolver_task.hpp +++ b/src/lapack/backends/rocsolver/rocsolver_task.hpp @@ -51,9 +51,9 @@ namespace lapack { namespace rocsolver { template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih) { #else cgh.host_task([f, queue](cl::sycl::interop_handle ih) { #endif @@ -63,7 +63,7 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } template -static inline void onemath_rocsolver_host_task(H &cgh, sycl::queue queue, F f) { +static inline void onemath_rocsolver_host_task(H& cgh, sycl::queue queue, F f) { (void)host_task_internal(cgh, queue, f); } diff --git a/src/lapack/function_table.hpp b/src/lapack/function_table.hpp index 9f72752bd..93c56450f 100644 --- a/src/lapack/function_table.hpp +++ b/src/lapack/function_table.hpp @@ -32,1808 +32,1812 @@ typedef struct { int version; - void (*cgebrd_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgebrd_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgebrd_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*zgebrd_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*sgerqf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dgerqf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*cgerqf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zgerqf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cgeqrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgeqrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgeqrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*zgeqrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cgetrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgetrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgetrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*zgetrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cgetri_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgetri_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgetri_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*zgetri_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cgetrs_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgetrs_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgetrs_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*zgetrs_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgesvd_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgesvd_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*cgesvd_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zgesvd_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cheevd_sycl)(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zheevd_sycl)(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*chegvd_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, + void (*cgebrd_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgebrd_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgebrd_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*zgebrd_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*sgerqf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dgerqf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*cgerqf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zgerqf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cgeqrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgeqrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgeqrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*zgeqrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cgetrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgetrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgetrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*zgetrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cgetri_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgetri_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgetri_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*zgetri_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cgetrs_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgetrs_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgetrs_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*zgetrs_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgesvd_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgesvd_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*cgesvd_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zgesvd_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cheevd_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zheevd_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*chegvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zhegvd_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, + void (*zhegvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*chetrd_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zhetrd_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*chetrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zhetrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*sorgbr_sycl)(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dorgbr_sycl)(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dorgqr_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sorgqr_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sorgtr_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*dorgtr_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sormtr_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*chetrd_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zhetrd_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*chetrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zhetrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*sorgbr_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dorgbr_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dorgqr_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sorgqr_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sorgtr_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*dorgtr_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sormtr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dormtr_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + void (*dormtr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*sormrq_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dormrq_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dormqr_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*sormqr_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*spotrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dpotrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*cpotrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zpotrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*spotri_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dpotri_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*cpotri_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zpotri_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*spotrs_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dpotrs_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*cpotrs_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zpotrs_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dsyevd_sycl)(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*ssyevd_sycl)(sycl::queue &queue, oneapi::math::job jobz, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dsygvd_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*ssygvd_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dsytrd_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*ssytrd_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*ssytrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*dsytrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*csytrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zsytrf_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*ctrtrs_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*sormrq_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dormrq_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dormqr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*sormqr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*spotrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dpotrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*cpotrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zpotrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*spotri_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dpotri_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*cpotri_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zpotri_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*spotrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dpotrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*cpotrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zpotrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dsyevd_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*ssyevd_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dsygvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*ssygvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dsytrd_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*ssytrd_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*ssytrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*dsytrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*csytrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zsytrf_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*ctrtrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*dtrtrs_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + void (*dtrtrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*strtrs_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + void (*strtrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*ztrtrs_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + void (*ztrtrs_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cungbr_sycl)(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zungbr_sycl)(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cungqr_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zungqr_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cungtr_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zungtr_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cunmrq_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cungbr_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zungbr_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cungqr_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zungqr_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cungtr_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zungtr_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cunmrq_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zunmrq_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, + void (*zunmrq_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*cunmqr_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, + void (*cunmqr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zunmqr_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::transpose trans, + void (*zunmqr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*cunmtr_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + void (*cunmtr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zunmtr_sycl)(sycl::queue &queue, oneapi::math::side side, oneapi::math::uplo uplo, + void (*zunmtr_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - sycl::event (*cgebrd_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgebrd_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, - double *taup, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgebrd_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgebrd_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgerqf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + sycl::event (*cgebrd_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgebrd_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, + double* taup, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sgebrd_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zgebrd_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sgerqf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgerqf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgerqf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgerqf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*cgerqf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgerqf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgerqf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgeqrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*cgeqrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgeqrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgeqrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgeqrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgeqrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgeqrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgeqrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + const std::vector& dependencies); + sycl::event (*cgetrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetri_usm_sycl)(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetri_usm_sycl)(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + const std::vector& dependencies); + sycl::event (*zgetrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgetri_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetri_usm_sycl)(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetri_usm_sycl)(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetri_usm_sycl)(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetri_usm_sycl)(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrs_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrs_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t *ipiv, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrs_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t *ipiv, float *b, std::int64_t ldb, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetrs_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgesvd_usm_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, + const std::vector& dependencies); + sycl::event (*zgetri_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgetrs_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetrs_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t* ipiv, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sgetrs_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t* ipiv, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zgetrs_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgesvd_usm_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgesvd_usm_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, + double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sgesvd_usm_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *s, float *u, std::int64_t ldu, - float *vt, std::int64_t ldvt, float *scratchpad, + float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu, + float* vt, std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgesvd_usm_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, + const std::vector& dependencies); + sycl::event (*cgesvd_usm_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgesvd_usm_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zgesvd_usm_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cheevd_usm_sycl)(sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cheevd_usm_sycl)(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zheevd_usm_sycl)(sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *w, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zheevd_usm_sycl)(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*chegvd_usm_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*chegvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zhegvd_usm_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zhegvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*chetrd_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*chetrd_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zhetrd_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zhetrd_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*chetrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zhetrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgbr_usm_sycl)(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgbr_usm_sycl)(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgqr_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgqr_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgtr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, + const std::vector& dependencies); + sycl::event (*chetrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zhetrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgbr_usm_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dorgbr_usm_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dorgqr_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgqr_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgtr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgtr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dorgtr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sormtr_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*sormtr_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dormtr_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*dormtr_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sormrq_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*sormrq_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dormrq_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*dormrq_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *c, std::int64_t ldc, double *scratchpad, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dormqr_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*dormqr_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *c, std::int64_t ldc, double *scratchpad, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sormqr_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*sormqr_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, + const std::vector& dependencies); + sycl::event (*spotrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dpotrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotri_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, + const std::vector& dependencies); + sycl::event (*cpotrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zpotrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*spotri_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotri_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dpotri_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotri_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotri_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrs_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, + const std::vector& dependencies); + sycl::event (*cpotri_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zpotri_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*spotrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrs_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dpotrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrs_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrs_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dsyevd_usm_sycl)(sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *w, double *scratchpad, + const std::vector& dependencies); + sycl::event (*cpotrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zpotrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dsyevd_usm_sycl)(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ssyevd_usm_sycl)(sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *w, float *scratchpad, + const std::vector& dependencies); + sycl::event (*ssyevd_usm_sycl)(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dsygvd_usm_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ssygvd_usm_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dsytrd_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ssytrd_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ssytrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dsytrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*csytrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zsytrf_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ctrtrs_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + const std::vector& dependencies); + sycl::event (*dsygvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*ssygvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dsytrd_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*ssytrd_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*ssytrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dsytrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*csytrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zsytrf_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*ctrtrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dtrtrs_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dtrtrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*strtrs_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + const std::vector& dependencies); + sycl::event (*strtrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ztrtrs_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + const std::vector& dependencies); + sycl::event (*ztrtrs_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cungbr_usm_sycl)(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungbr_usm_sycl)(sycl::queue &queue, oneapi::math::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cungqr_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cungbr_usm_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zungbr_usm_sycl)(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cungqr_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungqr_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zungqr_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cungtr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*cungtr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungtr_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zungtr_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cunmrq_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*cunmrq_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zunmrq_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*zunmrq_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cunmqr_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*cunmqr_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zunmqr_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*zunmqr_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cunmtr_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + const std::vector& dependencies); + sycl::event (*cunmtr_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zunmtr_usm_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zunmtr_usm_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - void (*sgeqrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + void (*sgeqrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dgeqrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*dgeqrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*cgeqrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + void (*cgeqrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zgeqrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + void (*zgeqrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*sgetri_batch_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &a, + void (*sgetri_batch_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dgetri_batch_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &a, + void (*dgetri_batch_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*cgetri_batch_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + void (*cgetri_batch_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zgetri_batch_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + void (*zgetri_batch_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*sgetrs_batch_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, + void (*sgetrs_batch_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*dgetrs_batch_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*dgetrs_batch_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*cgetrs_batch_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*cgetrs_batch_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zgetrs_batch_sycl)(sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + void (*zgetrs_batch_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*sgetrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*sgetrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dgetrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*dgetrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*cgetrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + void (*cgetrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zgetrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + void (*zgetrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*sorgqr_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*sorgqr_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dorgqr_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*dorgqr_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*spotrf_batch_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*spotrf_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dpotrf_batch_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*dpotrf_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*cpotrf_batch_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + void (*cpotrf_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zpotrf_batch_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + void (*zpotrf_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*spotrs_batch_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + void (*spotrs_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*dpotrs_batch_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*dpotrs_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*cpotrs_batch_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*cpotrs_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zpotrs_batch_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + void (*zpotrs_batch_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*cungqr_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + void (*cungqr_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zungqr_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + void (*zungqr_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - sycl::event (*sgeqrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + sycl::event (*sgeqrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgeqrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgeqrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgeqrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies); + sycl::event (*cgeqrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgeqrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies); + sycl::event (*zgeqrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgetrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies); + sycl::event (*cgetrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies); + sycl::event (*zgetrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetri_batch_usm_sycl)(sycl::queue &queue, std::int64_t n, float *a, + const std::vector& dependencies); + sycl::event (*sgetri_batch_usm_sycl)(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetri_batch_usm_sycl)(sycl::queue &queue, std::int64_t n, double *a, + const std::vector& dependencies); + sycl::event (*dgetri_batch_usm_sycl)(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetri_batch_usm_sycl)(sycl::queue &queue, std::int64_t n, std::complex *a, + const std::vector& dependencies); + sycl::event (*cgetri_batch_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetri_batch_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies); + sycl::event (*zgetri_batch_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrs_batch_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, + const std::vector& dependencies); + sycl::event (*sgetrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, float *b, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrs_batch_usm_sycl)(sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, + const std::vector& dependencies); + sycl::event (*dgetrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, double *b, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*cgetrs_batch_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); sycl::event (*zgetrs_batch_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgqr_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgqr_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgqr_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, + const std::vector& dependencies); + sycl::event (*dorgqr_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrf_batch_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrf_batch_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrf_batch_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*spotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrf_batch_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*zpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrs_batch_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrs_batch_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrs_batch_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, + const std::vector& dependencies); + sycl::event (*spotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, std::int64_t stride_a, float* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, double* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrs_batch_usm_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, + const std::vector& dependencies); + sycl::event (*zpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cungqr_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies); + sycl::event (*cungqr_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungqr_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies); + sycl::event (*zungqr_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgeqrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgeqrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgeqrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*sgeqrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgeqrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgeqrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgeqrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgeqrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgetrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgetrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetri_group_usm_sycl)(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetri_group_usm_sycl)(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetri_group_usm_sycl)(sycl::queue &queue, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetri_group_usm_sycl)(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetri_group_usm_sycl)(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgetri_group_usm_sycl)(sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetri_group_usm_sycl)(sycl::queue &queue, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgetri_group_usm_sycl)(sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrs_group_usm_sycl)(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrs_group_usm_sycl)(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrs_group_usm_sycl)(sycl::queue &queue, oneapi::math::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*cgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*zgetrs_group_usm_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgqr_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgqr_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, - double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgqr_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dorgqr_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, + double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrf_group_usm_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrf_group_usm_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrf_group_usm_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*spotrf_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrf_group_usm_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrs_group_usm_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrs_group_usm_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + const std::vector& dependencies); + sycl::event (*spotrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dpotrs_group_usm_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); sycl::event (*cpotrs_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies); sycl::event (*zpotrs_group_usm_sycl)( - sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies); - sycl::event (*cungqr_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies); + sycl::event (*cungqr_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungqr_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zungqr_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + const std::vector& dependencies); - std::int64_t (*sgebrd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sgebrd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*dgebrd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dgebrd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*cgebrd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cgebrd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*zgebrd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zgebrd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*sgerqf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sgerqf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*dgerqf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dgerqf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*cgerqf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cgerqf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*zgerqf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zgerqf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*sgeqrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sgeqrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*dgeqrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dgeqrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*cgeqrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cgeqrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*zgeqrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zgeqrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*sgesvd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, + std::int64_t (*sgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); - std::int64_t (*dgesvd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, + std::int64_t (*dgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); - std::int64_t (*cgesvd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, + std::int64_t (*cgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); - std::int64_t (*zgesvd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::jobsvd jobu, + std::int64_t (*zgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); - std::int64_t (*sgetrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sgetrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*dgetrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dgetrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*cgetrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cgetrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*zgetrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zgetrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*sgetri_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*sgetri_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda); - std::int64_t (*dgetri_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*dgetri_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda); - std::int64_t (*cgetri_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*cgetri_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda); - std::int64_t (*zgetri_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*zgetri_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda); - std::int64_t (*sgetrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + std::int64_t (*sgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*dgetrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + std::int64_t (*dgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*cgetrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + std::int64_t (*cgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*zgetrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::transpose trans, + std::int64_t (*zgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*cheevd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::job jobz, + std::int64_t (*cheevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zheevd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::job jobz, + std::int64_t (*zheevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*chegvd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t itype, + std::int64_t (*chegvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); - std::int64_t (*zhegvd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t itype, + std::int64_t (*zhegvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); - std::int64_t (*chetrd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*chetrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zhetrd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*zhetrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*chetrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*chetrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zhetrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*zhetrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*sorgbr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::generate vect, + std::int64_t (*sorgbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*dorgbr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::generate vect, + std::int64_t (*dorgbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*sorgtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*sorgtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dorgtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*dorgtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*sorgqr_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sorgqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*dorgqr_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dorgqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*sormrq_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*sormrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*dormrq_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*dormrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*sormqr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*sormqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*dormqr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*dormqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*sormtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*sormtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); - std::int64_t (*dormtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*dormtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); - std::int64_t (*spotrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*spotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dpotrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*dpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*cpotrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*cpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zpotrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*zpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*spotrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*spotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*dpotrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*dpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*cpotrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*cpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*zpotrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*zpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*spotri_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*spotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dpotri_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*dpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*cpotri_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*cpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zpotri_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*zpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*ssytrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*ssytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dsytrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*dsytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*csytrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*csytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zsytrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*zsytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*ssyevd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::job jobz, + std::int64_t (*ssyevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dsyevd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::job jobz, + std::int64_t (*dsyevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*ssygvd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t itype, + std::int64_t (*ssygvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); - std::int64_t (*dsygvd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t itype, + std::int64_t (*dsygvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); - std::int64_t (*ssytrd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*ssytrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dsytrd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*dsytrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*strtrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*strtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*dtrtrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*dtrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*ctrtrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*ctrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*ztrtrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*ztrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*cungbr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::generate vect, + std::int64_t (*cungbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*zungbr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::generate vect, + std::int64_t (*zungbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*cungqr_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cungqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*zungqr_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zungqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*cungtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*cungtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zungtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*zungtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*cunmrq_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*cunmrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*zunmrq_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*zunmrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*cunmqr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*cunmqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*zunmqr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*zunmqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*cunmtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*cunmtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); - std::int64_t (*zunmtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::side side, + std::int64_t (*zunmtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); - std::int64_t (*sgetrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*sgetrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*dgetrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*dgetrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*cgetrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*cgetrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*zgetrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*zgetrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*sgetri_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*sgetri_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*dgetri_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*dgetri_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*cgetri_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*cgetri_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*zgetri_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*zgetri_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); std::int64_t (*sgetrs_batch_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); std::int64_t (*dgetrs_batch_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); std::int64_t (*cgetrs_batch_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); std::int64_t (*zgetrs_batch_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*sgeqrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*sgeqrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*dgeqrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*dgeqrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*cgeqrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*cgeqrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*zgeqrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*zgeqrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*spotrf_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*spotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); - std::int64_t (*dpotrf_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*dpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); - std::int64_t (*cpotrf_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*cpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); - std::int64_t (*zpotrf_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*zpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); - std::int64_t (*spotrs_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*spotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*dpotrs_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*dpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*cpotrs_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*cpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*zpotrs_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo uplo, + std::int64_t (*zpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*sorgqr_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*sorgqr_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*dorgqr_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*dorgqr_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*cungqr_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*cungqr_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*zungqr_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*zungqr_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*sgetrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t (*sgetrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dgetrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*dgetrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cgetrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*cgetrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zgetrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*zgetrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*sgetri_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dgetri_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cgetri_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zgetri_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); + std::int64_t (*sgetri_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*dgetri_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*cgetri_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*zgetri_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); std::int64_t (*sgetrs_group_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); std::int64_t (*dgetrs_group_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); std::int64_t (*cgetrs_group_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); std::int64_t (*zgetrs_group_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); - std::int64_t (*sgeqrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); + std::int64_t (*sgeqrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dgeqrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*dgeqrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cgeqrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*cgeqrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zgeqrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*zgeqrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*sorgqr_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dorgqr_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*spotrf_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*sorgqr_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*dorgqr_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*spotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dpotrf_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*dpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cpotrf_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*cpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zpotrf_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*zpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*spotrs_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, + std::int64_t* group_sizes); + std::int64_t (*spotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dpotrs_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, + std::int64_t* group_sizes); + std::int64_t (*dpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cpotrs_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, + std::int64_t* group_sizes); + std::int64_t (*cpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zpotrs_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, + std::int64_t* group_sizes); + std::int64_t (*zpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cungqr_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zungqr_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); + std::int64_t (*cungqr_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*zungqr_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); } lapack_function_table_t; diff --git a/src/lapack/lapack_loader.cpp b/src/lapack/lapack_loader.cpp index 3283d458c..410c04575 100644 --- a/src/lapack/lapack_loader.cpp +++ b/src/lapack/lapack_loader.cpp @@ -30,2063 +30,2065 @@ namespace detail { static oneapi::math::detail::table_initializer function_tables; -void gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tauq, - sycl::buffer> &taup, sycl::buffer> &scratchpad, +void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tauq, + sycl::buffer>& taup, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tauq, - sycl::buffer> &taup, sycl::buffer> &scratchpad, +void gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tauq, + sycl::buffer>& taup, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +void getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void getrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void getrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void getrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, +void gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, +void gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, +void gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, +void gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void heevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer> &scratchpad, +void heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void heevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void hegvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, +void hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].chegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void hegvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, +void hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zhegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void hetrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].chetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void hetrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zhetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void hetrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].chetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void hetrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zhetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void orgbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +void orgbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void orgbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void orgtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void ormtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void syevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +void syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dsyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void syevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ssyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void sygvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dsygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void sygvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ssygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void sytrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +void sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dsytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void sytrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, +void sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ssytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ssytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].csytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ctrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dtrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].strtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ztrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void ungbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void ungbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void ungbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void ungtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void unmrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +void unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -sycl::event gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgebrd_usm_sycl( queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgebrd_usm_sycl( queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgebrd_usm_sycl( queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gebrd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gebrd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgebrd_usm_sycl( queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrs_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t *ipiv, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t* ipiv, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrs_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrs_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrs_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *s, double *u, std::int64_t ldu, double *vt, - std::int64_t ldvt, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* s, double* u, std::int64_t ldu, double* vt, + std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt, + std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, - oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, float *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::jobsvd jobu, +sycl::event gesvd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, std::complex *u, - std::int64_t ldu, std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* a, std::int64_t lda, double* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event heevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - float *w, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + float* w, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cheevd_usm_sycl( queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event heevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event heevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zheevd_usm_sycl( queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event hegvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, +sycl::event hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, float *w, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].chegvd_usm_sycl( queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event hegvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, +sycl::event hegvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zhegvd_usm_sycl( queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].chetrd_usm_sycl( queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event hetrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zhetrd_usm_sycl( queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].chetrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zhetrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event orgbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgbr_usm_sycl( queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgbr_usm_sycl( queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgqr_usm_sycl( queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgqr_usm_sycl( queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgtr_usm_sycl( queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgtr_usm_sycl( queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ormtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sormtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event ormtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dormtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event ormrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + double* a, std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + double* a, std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event ormqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potri(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrs_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrs_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrs_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrs_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event syevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dsyevd_usm_sycl( queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event syevd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event syevd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].ssyevd_usm_sycl( queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event sygvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *w, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* w, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dsygvd_usm_sycl( queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event sygvd(oneapi::math::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *w, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sygvd(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* w, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].ssygvd_usm_sycl( queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dsytrd_usm_sycl( queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrd(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrd(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].ssytrd_usm_sycl( queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].ssytrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dsytrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].csytrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrf(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zsytrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, +sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].ctrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, +sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t nrhs, double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dtrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, +sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, std::int64_t ldb, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t nrhs, float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].strtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, +sycl::event trtrs(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].ztrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event ungbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungbr_usm_sycl( queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungbr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungbr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungbr_usm_sycl( queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungqr_usm_sycl( queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungqr(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungqr_usm_sycl( queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungtr_usm_sycl( queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungtr_usm_sycl( queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event unmrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmrq(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event unmrq(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmqr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event unmqr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmtr(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::side side, +sycl::event unmtr(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -void geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgeqrf_batch_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgeqrf_batch_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgeqrf_batch_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgeqrf_batch_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetri_batch_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetri_batch_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetri_batch_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetri_batch_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, std::int64_t ldb, +void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, std::int64_t ldb, +void getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetrf_batch_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetrf_batch_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetrf_batch_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetrf_batch_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sorgqr_batch_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dorgqr_batch_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, +void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, +void potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer> &b, +void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer> &b, +void potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +void ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cungqr_batch_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +void ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zungqr_batch_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, - float *tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, + float* tau, std::int64_t stride_tau, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgeqrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, - double *tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, + double* tau, std::int64_t stride_tau, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgeqrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgeqrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgeqrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, +sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetri_batch_usm_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, +sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetri_batch_usm_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetri_batch_usm_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetri_batch_usm_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, +sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, +sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, +sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgqr_batch_usm_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgqr_batch_usm_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, +sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, +sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrs_batch_usm_sycl( queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrs_batch_usm_sycl( queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, +sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrs_batch_usm_sycl( queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, +sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrs_batch_usm_sycl( queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungqr_batch_usm_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungqr_batch_usm_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, +sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgeqrf_group_usm_sycl( queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, +sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgeqrf_group_usm_sycl( queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgeqrf_group_usm_sycl( queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event geqrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgeqrf_group_usm_sycl( queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, +sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrf_group_usm_sycl( queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, +sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrf_group_usm_sycl( queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrf_group_usm_sycl( queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrf_group_usm_sycl( queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetri_group_usm_sycl( queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetri_group_usm_sycl( queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetri_group_usm_sycl( queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetri_group_usm_sycl( queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrs_group_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrs_group_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrs_group_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getrs_batch(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrs_group_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, +sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgqr_group_usm_sycl( queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double **a, std::int64_t *lda, - double **tau, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double** a, std::int64_t* lda, + double** tau, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgqr_group_usm_sycl( queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, float** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrf_group_usm_sycl( queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, double** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrf_group_usm_sycl( queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrf_group_usm_sycl( queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrf_group_usm_sycl( queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, float** a, std::int64_t* lda, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrs_group_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrs_group_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrs_group_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrs_group_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungqr_group_usm_sycl( queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event ungqr_batch(oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungqr_group_usm_sycl( queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } template <> -std::int64_t gebrd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t gebrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t gebrd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t gebrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t gerqf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t gerqf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t gerqf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t gerqf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t geqrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t geqrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t geqrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t geqrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { @@ -2094,7 +2096,7 @@ std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::que lda, ldu, ldvt); } template <> -std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { @@ -2103,7 +2105,7 @@ std::int64_t gesvd_scratchpad_size(oneapi::math::device libkey, sycl::qu } template <> std::int64_t gesvd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, @@ -2113,7 +2115,7 @@ std::int64_t gesvd_scratchpad_size>(oneapi::math::device lib } template <> std::int64_t gesvd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, @@ -2122,58 +2124,58 @@ std::int64_t gesvd_scratchpad_size>(oneapi::math::device li lda, ldu, ldvt); } template <> -std::int64_t getrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t getrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t getri_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getri_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgetri_scratchpad_size_sycl(queue, n, lda); } template <> -std::int64_t getri_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getri_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgetri_scratchpad_size_sycl(queue, n, lda); } template <> -std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].sgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); } template <> -std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].dgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, @@ -2181,7 +2183,7 @@ std::int64_t getrs_scratchpad_size(oneapi::math::device libkey, sycl::qu } template <> std::int64_t getrs_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2190,7 +2192,7 @@ std::int64_t getrs_scratchpad_size>(oneapi::math::device lib } template <> std::int64_t getrs_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2199,7 +2201,7 @@ std::int64_t getrs_scratchpad_size>(oneapi::math::device li } template <> std::int64_t heevd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::job jobz, + sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cheevd_scratchpad_size_sycl(queue, jobz, uplo, n, @@ -2207,7 +2209,7 @@ std::int64_t heevd_scratchpad_size>(oneapi::math::device lib } template <> std::int64_t heevd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::job jobz, + sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zheevd_scratchpad_size_sycl(queue, jobz, uplo, n, @@ -2215,7 +2217,7 @@ std::int64_t heevd_scratchpad_size>(oneapi::math::device li } template <> std::int64_t hegvd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t itype, + sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { @@ -2224,7 +2226,7 @@ std::int64_t hegvd_scratchpad_size>(oneapi::math::device lib } template <> std::int64_t hegvd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t itype, + sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { @@ -2233,68 +2235,70 @@ std::int64_t hegvd_scratchpad_size>(oneapi::math::device li } template <> std::int64_t hetrd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].chetrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrd_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { return function_tables[{ libkey, queue }].zhetrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].chetrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { return function_tables[{ libkey, queue }].zhetrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t orgbr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgbr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].sorgbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); } template <> -std::int64_t orgbr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgbr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].dorgbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); } template <> -std::int64_t orgtr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sorgtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t orgtr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dorgtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].sorgqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> -std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].dorgqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> -std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2302,7 +2306,7 @@ std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, sycl::que k, lda, ldc); } template <> -std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2310,7 +2314,7 @@ std::int64_t ormrq_scratchpad_size(oneapi::math::device libkey, sycl::qu k, lda, ldc); } template <> -std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2318,7 +2322,7 @@ std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, sycl::que k, lda, ldc); } template <> -std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2326,7 +2330,7 @@ std::int64_t ormqr_scratchpad_size(oneapi::math::device libkey, sycl::qu k, lda, ldc); } template <> -std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2334,7 +2338,7 @@ std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, sycl::que m, n, lda, ldc); } template <> -std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2342,38 +2346,39 @@ std::int64_t ormtr_scratchpad_size(oneapi::math::device libkey, sycl::qu m, n, lda, ldc); } template <> -std::int64_t potrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].spotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t potrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { return function_tables[{ libkey, queue }].zpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb) { +std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].spotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); } template <> -std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].dpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, @@ -2381,7 +2386,7 @@ std::int64_t potrs_scratchpad_size(oneapi::math::device libkey, sycl::qu } template <> std::int64_t potrs_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].cpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, @@ -2389,76 +2394,79 @@ std::int64_t potrs_scratchpad_size>(oneapi::math::device lib } template <> std::int64_t potrs_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb) { + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb) { return function_tables[{ libkey, queue }].zpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); } template <> -std::int64_t potri_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potri_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].spotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t potri_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potri_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { return function_tables[{ libkey, queue }].zpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t sytrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t sytrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].ssytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t sytrf_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t sytrf_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dsytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].csytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { return function_tables[{ libkey, queue }].zsytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].ssyevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); } template <> -std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t syevd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dsyevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); } template <> -std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { @@ -2466,35 +2474,35 @@ std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, sycl::que n, lda, ldb); } template <> -std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t sygvd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, - oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, - std::int64_t ldb) { + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].dsygvd_scratchpad_size_sycl(queue, itype, jobz, uplo, n, lda, ldb); } template <> -std::int64_t sytrd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t sytrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].ssytrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t sytrd_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t sytrd_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dsytrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t ldb) { + oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].strtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, lda, ldb); } template <> -std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2503,7 +2511,7 @@ std::int64_t trtrs_scratchpad_size(oneapi::math::device libkey, sycl::qu } template <> std::int64_t trtrs_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, @@ -2512,18 +2520,16 @@ std::int64_t trtrs_scratchpad_size>(oneapi::math::device lib n, nrhs, lda, ldb); } template <> -std::int64_t trtrs_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - oneapi::math::transpose trans, - oneapi::math::diag diag, std::int64_t n, - std::int64_t nrhs, std::int64_t lda, - std::int64_t ldb) { +std::int64_t trtrs_scratchpad_size>( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].ztrtrs_scratchpad_size_sycl(queue, uplo, trans, diag, n, nrhs, lda, ldb); } template <> std::int64_t ungbr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { @@ -2532,42 +2538,43 @@ std::int64_t ungbr_scratchpad_size>(oneapi::math::device lib } template <> std::int64_t ungbr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, - oneapi::math::generate vect, std::int64_t m, - std::int64_t n, std::int64_t k, - std::int64_t lda) { + sycl::queue& queue, + oneapi::math::generate vect, + std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].zungbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); } template <> std::int64_t ungqr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].cungqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ungqr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].zungqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ungtr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cungtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t ungtr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::uplo uplo, - std::int64_t n, std::int64_t lda) { + sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { return function_tables[{ libkey, queue }].zungtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t unmrq_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2576,18 +2583,16 @@ std::int64_t unmrq_scratchpad_size>(oneapi::math::device lib k, lda, ldc); } template <> -std::int64_t unmrq_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc) { +std::int64_t unmrq_scratchpad_size>( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc) { return function_tables[{ libkey, queue }].zunmrq_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, ldc); } template <> std::int64_t unmqr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, + sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2596,18 +2601,16 @@ std::int64_t unmqr_scratchpad_size>(oneapi::math::device lib k, lda, ldc); } template <> -std::int64_t unmqr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, - std::int64_t ldc) { +std::int64_t unmqr_scratchpad_size>( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc) { return function_tables[{ libkey, queue }].zunmqr_scratchpad_size_sycl(queue, side, trans, m, n, k, lda, ldc); } template <> std::int64_t unmtr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, + sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, @@ -2616,17 +2619,15 @@ std::int64_t unmtr_scratchpad_size>(oneapi::math::device lib m, n, lda, ldc); } template <> -std::int64_t unmtr_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, oneapi::math::side side, - oneapi::math::uplo uplo, - oneapi::math::transpose trans, - std::int64_t m, std::int64_t n, - std::int64_t lda, std::int64_t ldc) { +std::int64_t unmtr_scratchpad_size>( + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc) { return function_tables[{ libkey, queue }].zunmtr_scratchpad_size_sycl(queue, side, uplo, trans, m, n, lda, ldc); } template <> -std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2634,7 +2635,7 @@ std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, syc queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2643,20 +2644,20 @@ std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sy } template <> std::int64_t getrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cgetrf_batch_scratchpad_size_sycl( queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zgetrf_batch_scratchpad_size_sycl( queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2664,7 +2665,7 @@ std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, syc queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2673,20 +2674,20 @@ std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sy } template <> std::int64_t getri_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cgetri_batch_scratchpad_size_sycl( queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zgetri_batch_scratchpad_size_sycl( queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -2696,7 +2697,7 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, syc queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> -std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -2707,7 +2708,7 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sy } template <> std::int64_t getrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cgetrs_batch_scratchpad_size_sycl( @@ -2715,14 +2716,14 @@ std::int64_t getrs_batch_scratchpad_size>( } template <> std::int64_t getrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose trans, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> -std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2730,7 +2731,7 @@ std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, syc queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2739,20 +2740,20 @@ std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sy } template <> std::int64_t geqrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cgeqrf_batch_scratchpad_size_sycl( queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t geqrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zgeqrf_batch_scratchpad_size_sycl( queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { @@ -2760,7 +2761,7 @@ std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, syc queue, uplo, n, lda, stride_a, batch_size); } template <> -std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { @@ -2769,20 +2770,20 @@ std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sy } template <> std::int64_t potrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cpotrf_batch_scratchpad_size_sycl( queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zpotrf_batch_scratchpad_size_sycl( queue, uplo, n, lda, stride_a, batch_size); } template <> -std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, @@ -2791,7 +2792,7 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, syc queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> -std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, @@ -2801,7 +2802,7 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sy } template <> std::int64_t potrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cpotrs_batch_scratchpad_size_sycl( @@ -2809,14 +2810,14 @@ std::int64_t potrs_batch_scratchpad_size>( } template <> std::int64_t potrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo uplo, std::int64_t n, + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zpotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> -std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2824,7 +2825,7 @@ std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, syc queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, +std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2833,245 +2834,245 @@ std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sy } template <> std::int64_t ungqr_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cungqr_batch_scratchpad_size_sycl( queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t ungqr_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::math::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zungqr_batch_scratchpad_size_sycl( queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sgetrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dgetrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cgetrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zgetrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sgetri_group_scratchpad_size_sycl( queue, n, lda, group_count, group_sizes); } template <> -std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *n, std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dgetri_group_scratchpad_size_sycl( queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, + sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cgetri_group_scratchpad_size_sycl( queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, + sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zgetri_group_scratchpad_size_sycl( queue, n, lda, group_count, group_sizes); } template <> -std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sgetrs_group_scratchpad_size_sycl( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dgetrs_group_scratchpad_size_sycl( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cgetrs_group_scratchpad_size_sycl( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zgetrs_group_scratchpad_size_sycl( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sgeqrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t geqrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dgeqrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cgeqrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size>(oneapi::math::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zgeqrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sorgqr_group_scratchpad_size_sycl( queue, m, n, k, lda, group_count, group_sizes); } template <> -std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t orgqr_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dorgqr_group_scratchpad_size_sycl( queue, m, n, k, lda, group_count, group_sizes); } template <> -std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].spotrf_group_scratchpad_size_sycl( queue, uplo, n, lda, group_count, group_sizes); } template <> -std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrf_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dpotrf_group_scratchpad_size_sycl( queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cpotrf_group_scratchpad_size_sycl( queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zpotrf_group_scratchpad_size_sycl( queue, uplo, n, lda, group_count, group_sizes); } template <> -std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].spotrs_group_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue &queue, - oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrs_batch_scratchpad_size(oneapi::math::device libkey, sycl::queue& queue, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dpotrs_group_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cpotrs_group_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, oneapi::math::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::math::device libkey, sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zpotrs_group_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t ungqr_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cungqr_group_scratchpad_size_sycl( queue, m, n, k, lda, group_count, group_sizes); } template <> std::int64_t ungqr_batch_scratchpad_size>( - oneapi::math::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::math::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zungqr_group_scratchpad_size_sycl( queue, m, n, k, lda, group_count, group_sizes); } diff --git a/src/rng/backends/curand/curand_task.hpp b/src/rng/backends/curand/curand_task.hpp index 6f9fa5613..4bd9c812a 100644 --- a/src/rng/backends/curand/curand_task.hpp +++ b/src/rng/backends/curand/curand_task.hpp @@ -15,18 +15,18 @@ namespace rng { namespace curand { #ifdef __HIPSYCL__ template -static inline void host_task_internal(H &cgh, A acc, E e, F f) { +static inline void host_task_internal(H& cgh, A acc, E e, F f) { cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) { curandStatus_t status; CURAND_CALL(curandSetStream, status, e, ih.get_native_queue()); auto r_ptr = - reinterpret_cast(ih.get_native_mem(acc)); + reinterpret_cast(ih.get_native_mem(acc)); f(r_ptr); }); } template -static inline void host_task_internal(H &cgh, E e, F f) { +static inline void host_task_internal(H& cgh, E e, F f) { cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) { curandStatus_t status; CURAND_CALL(curandSetStream, status, e, ih.get_native_queue()); @@ -35,16 +35,16 @@ static inline void host_task_internal(H &cgh, E e, F f) { } #else template -static inline void host_task_internal(H &cgh, A acc, E e, F f) { +static inline void host_task_internal(H& cgh, A acc, E e, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else cgh.host_task([=](sycl::interop_handle ih) { #endif curandStatus_t status; auto stream = ih.get_native_queue(); CURAND_CALL(curandSetStream, status, e, stream); - auto r_ptr = reinterpret_cast( + auto r_ptr = reinterpret_cast( ih.get_native_mem(acc)); f(r_ptr); #ifndef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND @@ -55,9 +55,9 @@ static inline void host_task_internal(H &cgh, A acc, E e, F f) { } template -static inline void host_task_internal(H &cgh, E e, F f) { +static inline void host_task_internal(H& cgh, E e, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else cgh.host_task([=](sycl::interop_handle ih) { #endif @@ -73,12 +73,12 @@ static inline void host_task_internal(H &cgh, E e, F f) { } #endif template -static inline void onemath_curand_host_task(H &cgh, A acc, E e, F f) { +static inline void onemath_curand_host_task(H& cgh, A acc, E e, F f) { host_task_internal(cgh, acc, e, f); } template -static inline void onemath_curand_host_task(H &cgh, Engine e, F f) { +static inline void onemath_curand_host_task(H& cgh, Engine e, F f) { host_task_internal(cgh, e, f); } diff --git a/src/rng/backends/curand/mrg32k3a.cpp b/src/rng/backends/curand/mrg32k3a.cpp index 4e2b6d263..14ce97adc 100644 --- a/src/rng/backends/curand/mrg32k3a.cpp +++ b/src/rng/backends/curand/mrg32k3a.cpp @@ -97,12 +97,12 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { mrg32k3a_impl(sycl::queue queue, std::initializer_list seed) : oneapi::math::rng::detail::engine_impl(queue) { throw oneapi::math::unimplemented("rng", "mrg32ka engine", - "multi-seed unsupported by cuRAND backend"); + "multi-seed unsupported by cuRAND backend"); } mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::math::rng::detail::engine_impl(*other) { throw oneapi::math::unimplemented("rng", "mrg32ka engine", - "copy construction unsupported by cuRAND backend"); + "copy construction unsupported by cuRAND backend"); } // Buffers API @@ -120,9 +120,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { range_transform_fp(queue_, distr.a(), distr.b(), n, r); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { queue_.submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) { @@ -160,9 +160,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { range_transform_fp_accurate(queue_, distr.a(), distr.b(), n, r); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { queue_.submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) { @@ -249,9 +249,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { "ICDF method not used for pseudorandom generators in cuRAND backend"); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented( "rng", "mrg32ka engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); @@ -312,7 +312,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) { @@ -362,7 +363,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) { @@ -434,8 +436,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); return queue_.submit([&](sycl::handler& cgh) { @@ -457,7 +459,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented( "rng", "mrg32ka engine", @@ -523,7 +526,7 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { virtual void skip_ahead(std::initializer_list num_to_skip) override { throw oneapi::math::unimplemented("rng", "skip_ahead", - "initializer list unsupported by cuRAND backend"); + "initializer list unsupported by cuRAND backend"); } virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override { @@ -563,9 +566,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } @@ -581,9 +584,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } @@ -629,9 +632,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } @@ -670,7 +673,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); return sycl::event{}; @@ -692,7 +696,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); return sycl::event{}; @@ -737,8 +742,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); return sycl::event{}; @@ -752,7 +757,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); return sycl::event{}; @@ -818,7 +824,7 @@ oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std:: } oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, - std::initializer_list seed) { + std::initializer_list seed) { return new mrg32k3a_impl(queue, seed); } diff --git a/src/rng/backends/curand/philox4x32x10.cpp b/src/rng/backends/curand/philox4x32x10.cpp index 375be1d6d..7f2c829ec 100644 --- a/src/rng/backends/curand/philox4x32x10.cpp +++ b/src/rng/backends/curand/philox4x32x10.cpp @@ -119,13 +119,13 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { philox4x32x10_impl(sycl::queue queue, std::initializer_list seed) : oneapi::math::rng::detail::engine_impl(queue) { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine", - "multi-seed unsupported by cuRAND backend"); + "multi-seed unsupported by cuRAND backend"); } philox4x32x10_impl(const philox4x32x10_impl* other) : oneapi::math::rng::detail::engine_impl(*other) { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine", - "copy construction unsupported by cuRAND backend"); + "copy construction unsupported by cuRAND backend"); } // Buffers API @@ -143,9 +143,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { range_transform_fp(queue_, distr.a(), distr.b(), n, r); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { queue_.submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) { @@ -183,9 +183,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { range_transform_fp_accurate(queue_, distr.a(), distr.b(), n, r); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { queue_.submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); onemath_curand_host_task(cgh, acc, engine_, [=](double* r_ptr) { @@ -272,9 +272,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { "ICDF method not used for pseudorandom generators in cuRAND backend"); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented( "rng", "philox4x32x10 engine", "ICDF method not used for pseudorandom generators in cuRAND backend"); @@ -335,7 +335,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) { @@ -384,7 +385,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); sycl::event generate_event = queue_.submit([&](sycl::handler& cgh) { @@ -456,8 +458,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); return queue_.submit([&](sycl::handler& cgh) { @@ -479,7 +481,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented( "rng", "philox4x32x10 engine", @@ -545,7 +548,7 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { virtual void skip_ahead(std::initializer_list num_to_skip) override { throw oneapi::math::unimplemented("rng", "skip_ahead", - "initializer list unsupported by cuRAND backend"); + "initializer list unsupported by cuRAND backend"); } virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override { @@ -585,9 +588,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } @@ -603,9 +606,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } @@ -651,9 +654,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } @@ -692,7 +695,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); return sycl::event{}; @@ -714,7 +718,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); return sycl::event{}; @@ -759,8 +764,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); return sycl::event{}; @@ -774,7 +779,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); return sycl::event{}; @@ -835,7 +841,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { }; #endif -oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) { +oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, + std::uint64_t seed) { return new philox4x32x10_impl(queue, seed); } diff --git a/src/rng/backends/mklcpu/cpu_common.hpp b/src/rng/backends/mklcpu/cpu_common.hpp index b83dfef6b..559f27960 100644 --- a/src/rng/backends/mklcpu/cpu_common.hpp +++ b/src/rng/backends/mklcpu/cpu_common.hpp @@ -34,19 +34,19 @@ namespace mklcpu { // host_task automatically uses run_on_host_intel if it is supported by the // compiler. Otherwise, it falls back to single_task. template -static inline auto host_task_internal(H &cgh, F f, int) -> decltype(cgh.host_task(f)) { +static inline auto host_task_internal(H& cgh, F f, int) -> decltype(cgh.host_task(f)) { return cgh.host_task(f); } template -static inline void host_task_internal(H &cgh, F f, long) { +static inline void host_task_internal(H& cgh, F f, long) { #ifndef __SYCL_DEVICE_ONLY__ cgh.template single_task(f); #endif } template -static inline void host_task(H &cgh, F f) { +static inline void host_task(H& cgh, F f) { (void)host_task_internal(cgh, f, 0); } @@ -57,7 +57,7 @@ template class kernel_name_usm {}; template -typename Acc::value_type *get_raw_ptr(Acc acc) { +typename Acc::value_type* get_raw_ptr(Acc acc) { // Workaround for AdaptiveCPP, as they do not yet support the get_multi_ptr function #ifndef __HIPSYCL__ return acc.template get_multi_ptr().get_raw(); diff --git a/src/rng/backends/mklcpu/mrg32k3a.cpp b/src/rng/backends/mklcpu/mrg32k3a.cpp index 89af18bd6..3bd79c400 100644 --- a/src/rng/backends/mklcpu/mrg32k3a.cpp +++ b/src/rng/backends/mklcpu/mrg32k3a.cpp @@ -576,7 +576,7 @@ oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std:: } oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, - std::initializer_list seed) { + std::initializer_list seed) { return new mrg32k3a_impl(queue, seed); } diff --git a/src/rng/backends/mklcpu/philox4x32x10.cpp b/src/rng/backends/mklcpu/philox4x32x10.cpp index 58b4c3aee..144ced995 100644 --- a/src/rng/backends/mklcpu/philox4x32x10.cpp +++ b/src/rng/backends/mklcpu/philox4x32x10.cpp @@ -573,7 +573,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { std::int32_t state_size_; }; -oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) { +oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, + std::uint64_t seed) { return new philox4x32x10_impl(queue, seed); } diff --git a/src/rng/backends/mklgpu/mrg32k3a.cpp b/src/rng/backends/mklgpu/mrg32k3a.cpp index 849f78761..bb31f2a38 100644 --- a/src/rng/backends/mklgpu/mrg32k3a.cpp +++ b/src/rng/backends/mklgpu/mrg32k3a.cpp @@ -43,119 +43,138 @@ namespace mklgpu { class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { public: mrg32k3a_impl(sycl::queue queue, std::uint32_t seed) - : oneapi::math::rng::detail::engine_impl(queue), engine_((oneapi::mkl::rng::mrg32k3a(queue, seed))) { - } + : oneapi::math::rng::detail::engine_impl(queue), + engine_((oneapi::mkl::rng::mrg32k3a(queue, seed))) {} mrg32k3a_impl(sycl::queue queue, std::initializer_list seed) - : oneapi::math::rng::detail::engine_impl(queue), engine_((oneapi::mkl::rng::mrg32k3a(queue, seed))) { - } + : oneapi::math::rng::detail::engine_impl(queue), + engine_((oneapi::mkl::rng::mrg32k3a(queue, seed))) {} - mrg32k3a_impl(const mrg32k3a_impl* other) : oneapi::math::rng::detail::engine_impl(*other), engine_((oneapi::mkl::rng::mrg32k3a(other->engine_))) { - } + mrg32k3a_impl(const mrg32k3a_impl* other) + : oneapi::math::rng::detail::engine_impl(*other), + engine_((oneapi::mkl::rng::mrg32k3a(other->engine_))) {} // Buffers API virtual void generate( const oneapi::math::rng::uniform& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::uniform< std::int32_t, oneapi::math::rng::uniform_method::standard>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate( const oneapi::math::rng::uniform& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::gaussian< float, oneapi::math::rng::gaussian_method::box_muller2>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::gaussian< double, oneapi::math::rng::gaussian_method::box_muller2>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::lognormal< float, oneapi::math::rng::lognormal_method::box_muller2>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::lognormal< double, oneapi::math::rng::lognormal_method::box_muller2>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate( const oneapi::math::rng::lognormal& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const bernoulli& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const bernoulli& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const poisson& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const poisson& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const bits& /*distr*/, std::int64_t /*n*/, sycl::buffer& /*r*/) override { - throw unimplemented("rng/mklgpu", "mrg32k3a::generate", "bits distribution is not supported"); + throw unimplemented("rng/mklgpu", "mrg32k3a::generate", + "bits distribution is not supported"); } // USM APIs @@ -163,13 +182,16 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { virtual sycl::event generate( const oneapi::math::rng::uniform& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( @@ -177,100 +199,119 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { distr, std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { ; - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::uniform& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::lognormal& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::lognormal& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate(const bernoulli& distr, std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate(const bernoulli& distr, std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const poisson& distr, std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const poisson& distr, std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } - virtual sycl::event generate(const bits& /*distr*/, std::int64_t /*n*/, std::uint32_t* /*r*/, + virtual sycl::event generate(const bits& /*distr*/, std::int64_t /*n*/, + std::uint32_t* /*r*/, const std::vector& /*dependencies*/) override { - throw unimplemented("rng/mklgpu", "mrg32k3a::generate", "bits distribution is not supported"); + throw unimplemented("rng/mklgpu", "mrg32k3a::generate", + "bits distribution is not supported"); return {}; } @@ -301,7 +342,7 @@ oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std:: } oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, - std::initializer_list seed) { + std::initializer_list seed) { return new mrg32k3a_impl(queue, seed); } diff --git a/src/rng/backends/mklgpu/onemkl_distribution_conversion.hpp b/src/rng/backends/mklgpu/onemkl_distribution_conversion.hpp index d1d16e293..67447110f 100644 --- a/src/rng/backends/mklgpu/onemkl_distribution_conversion.hpp +++ b/src/rng/backends/mklgpu/onemkl_distribution_conversion.hpp @@ -80,42 +80,44 @@ struct convert_distrib_t; template struct convert_distrib_t> { -auto operator()(uniform distribution) { - using onemkl_method_t = typename convert_method_t::type; - return oneapi::mkl::rng::uniform(distribution.a(), distribution.b()); -} + auto operator()(uniform distribution) { + using onemkl_method_t = typename convert_method_t::type; + return oneapi::mkl::rng::uniform(distribution.a(), distribution.b()); + } }; template struct convert_distrib_t> { -auto operator()(gaussian distribution) { - using onemkl_method_t = typename convert_method_t::type; - return oneapi::mkl::rng::gaussian(distribution.mean(), distribution.stddev()); -} + auto operator()(gaussian distribution) { + using onemkl_method_t = typename convert_method_t::type; + return oneapi::mkl::rng::gaussian(distribution.mean(), + distribution.stddev()); + } }; template struct convert_distrib_t> { -auto operator()(lognormal distribution) { - using onemkl_method_t = typename convert_method_t::type; - return oneapi::mkl::rng::lognormal(distribution.m(), distribution.s(), distribution.displ(), distribution.scale()); -} + auto operator()(lognormal distribution) { + using onemkl_method_t = typename convert_method_t::type; + return oneapi::mkl::rng::lognormal( + distribution.m(), distribution.s(), distribution.displ(), distribution.scale()); + } }; template struct convert_distrib_t> { -auto operator()(bernoulli distribution) { - using onemkl_method_t = typename convert_method_t::type; - return oneapi::mkl::rng::bernoulli(distribution.p()); -} + auto operator()(bernoulli distribution) { + using onemkl_method_t = typename convert_method_t::type; + return oneapi::mkl::rng::bernoulli(distribution.p()); + } }; template struct convert_distrib_t> { -auto operator()(poisson distribution) { - using onemkl_method_t = typename convert_method_t::type; - return oneapi::mkl::rng::poisson(distribution.lambda()); -} + auto operator()(poisson distribution) { + using onemkl_method_t = typename convert_method_t::type; + return oneapi::mkl::rng::poisson(distribution.lambda()); + } }; template @@ -123,9 +125,9 @@ inline auto get_onemkl_distribution(DistributionT distribution) { return convert_distrib_t()(distribution); } -} // namespace detail -} // namespace rng -} // namespace math -} // namespace oneapi +} // namespace detail +} // namespace rng +} // namespace math +} // namespace oneapi #endif // _ONEMATH_SRC_RNG_ONEMKL_DISTRIBUTION_CONVERSION_HPP_ diff --git a/src/rng/backends/mklgpu/philox4x32x10.cpp b/src/rng/backends/mklgpu/philox4x32x10.cpp index 36fa9e621..87cdb42fd 100644 --- a/src/rng/backends/mklgpu/philox4x32x10.cpp +++ b/src/rng/backends/mklgpu/philox4x32x10.cpp @@ -43,120 +43,138 @@ namespace mklgpu { class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { public: philox4x32x10_impl(sycl::queue queue, std::uint64_t seed) - : oneapi::math::rng::detail::engine_impl(queue), engine_((oneapi::mkl::rng::philox4x32x10(queue, seed))) { - } + : oneapi::math::rng::detail::engine_impl(queue), + engine_((oneapi::mkl::rng::philox4x32x10(queue, seed))) {} philox4x32x10_impl(sycl::queue queue, std::initializer_list seed) - : oneapi::math::rng::detail::engine_impl(queue), engine_((oneapi::mkl::rng::philox4x32x10(queue, seed))) { - } + : oneapi::math::rng::detail::engine_impl(queue), + engine_((oneapi::mkl::rng::philox4x32x10(queue, seed))) {} philox4x32x10_impl(const philox4x32x10_impl* other) - : oneapi::math::rng::detail::engine_impl(*other), engine_((oneapi::mkl::rng::philox4x32x10(other->engine_))) { - } + : oneapi::math::rng::detail::engine_impl(*other), + engine_((oneapi::mkl::rng::philox4x32x10(other->engine_))) {} // Buffers API virtual void generate( const oneapi::math::rng::uniform& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::uniform< std::int32_t, oneapi::math::rng::uniform_method::standard>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate( const oneapi::math::rng::uniform& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::gaussian< float, oneapi::math::rng::gaussian_method::box_muller2>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::gaussian< double, oneapi::math::rng::gaussian_method::box_muller2>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::lognormal< float, oneapi::math::rng::lognormal_method::box_muller2>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const oneapi::math::rng::lognormal< double, oneapi::math::rng::lognormal_method::box_muller2>& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate( const oneapi::math::rng::lognormal& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const bernoulli& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const bernoulli& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const poisson& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const poisson& distr, std::int64_t n, sycl::buffer& r) override { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r)); } virtual void generate(const bits& /*distr*/, std::int64_t /*n*/, sycl::buffer& /*r*/) override { - throw unimplemented("rng/mklgpu", "philox4x32x10::generate", "bits distribution is not supported"); + throw unimplemented("rng/mklgpu", "philox4x32x10::generate", + "bits distribution is not supported"); } // USM APIs @@ -164,13 +182,16 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { virtual sycl::event generate( const oneapi::math::rng::uniform& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( @@ -178,100 +199,119 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { distr, std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { ; - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::uniform& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::gaussian& distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::lognormal& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const oneapi::math::rng::lognormal& distr, std::int64_t n, float* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate(const bernoulli& distr, std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate(const bernoulli& distr, std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const poisson& distr, std::int64_t n, std::int32_t* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } virtual sycl::event generate( const poisson& distr, std::int64_t n, std::uint32_t* r, const std::vector& dependencies) override { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate(detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::rng::generate( + detail::get_onemkl_distribution(distr), engine_, n, r, dependencies)); } - virtual sycl::event generate(const bits& /*distr*/, std::int64_t /*n*/, std::uint32_t* /*r*/, + virtual sycl::event generate(const bits& /*distr*/, std::int64_t /*n*/, + std::uint32_t* /*r*/, const std::vector& /*dependencies*/) override { - throw unimplemented("rng/mklgpu", "philox4x32x10::generate", "bits distribution is not supported"); + throw unimplemented("rng/mklgpu", "philox4x32x10::generate", + "bits distribution is not supported"); return {}; } @@ -297,7 +337,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { oneapi::mkl::rng::philox4x32x10 engine_; }; -oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) { +oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, + std::uint64_t seed) { return new philox4x32x10_impl(queue, seed); } diff --git a/src/rng/backends/rocrand/mrg32k3a.cpp b/src/rng/backends/rocrand/mrg32k3a.cpp index 20d62b33f..741c45432 100644 --- a/src/rng/backends/rocrand/mrg32k3a.cpp +++ b/src/rng/backends/rocrand/mrg32k3a.cpp @@ -99,7 +99,7 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { mrg32k3a_impl(sycl::queue queue, std::initializer_list seed) : oneapi::math::rng::detail::engine_impl(queue) { throw oneapi::math::unimplemented("rng", "mrg32ka engine", - "multi-seed unsupported by rocRAND backend"); + "multi-seed unsupported by rocRAND backend"); } mrg32k3a_impl(const mrg32k3a_impl* other) @@ -134,9 +134,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { range_transform_fp(queue_, distr.a(), distr.b(), n, r); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { queue_ .submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); @@ -189,9 +189,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { range_transform_fp_accurate(queue_, distr.a(), distr.b(), n, r); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { queue_ .submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); @@ -326,9 +326,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { increment_internal_offset(n); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { queue_ .submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); @@ -425,7 +425,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); queue_ @@ -481,7 +482,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); queue_ @@ -587,8 +589,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); auto event = queue_.submit([&](sycl::handler& cgh) { @@ -622,7 +624,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); auto event = queue_.submit([&](sycl::handler& cgh) { @@ -716,7 +719,7 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { virtual void skip_ahead(std::initializer_list num_to_skip) override { throw oneapi::math::unimplemented("rng", "skip_ahead", - "initializer list unsupported by rocRAND backend"); + "initializer list unsupported by rocRAND backend"); } virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override { @@ -761,9 +764,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } @@ -779,9 +782,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } @@ -827,9 +830,9 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); } @@ -868,7 +871,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); return sycl::event{}; @@ -890,7 +894,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); return sycl::event{}; @@ -935,8 +940,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); return sycl::event{}; @@ -950,7 +955,8 @@ class mrg32k3a_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "mrg32ka engine"); return sycl::event{}; @@ -1016,7 +1022,7 @@ oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, std:: } oneapi::math::rng::detail::engine_impl* create_mrg32k3a(sycl::queue queue, - std::initializer_list seed) { + std::initializer_list seed) { return new mrg32k3a_impl(queue, seed); } diff --git a/src/rng/backends/rocrand/philox4x32x10.cpp b/src/rng/backends/rocrand/philox4x32x10.cpp index 6753b9c1b..682aac11e 100644 --- a/src/rng/backends/rocrand/philox4x32x10.cpp +++ b/src/rng/backends/rocrand/philox4x32x10.cpp @@ -121,7 +121,7 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { philox4x32x10_impl(sycl::queue queue, std::initializer_list seed) : oneapi::math::rng::detail::engine_impl(queue) { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine", - "multi-seed unsupported by rocRAND backend"); + "multi-seed unsupported by rocRAND backend"); } philox4x32x10_impl(const philox4x32x10_impl* other) @@ -156,9 +156,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { range_transform_fp(queue_, distr.a(), distr.b(), n, r); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { queue_ .submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); @@ -211,9 +211,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { range_transform_fp_accurate(queue_, distr.a(), distr.b(), n, r); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { queue_ .submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); @@ -348,9 +348,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { increment_internal_offset(n); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { queue_ .submit([&](sycl::handler& cgh) { auto acc = r.get_access(cgh); @@ -447,7 +447,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); queue_ @@ -503,7 +504,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); queue_ @@ -609,8 +611,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); auto event = queue_.submit([&](sycl::handler& cgh) { @@ -644,7 +646,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { sycl::event::wait_and_throw(dependencies); auto event = queue_.submit([&](sycl::handler& cgh) { @@ -737,7 +740,7 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { virtual void skip_ahead(std::initializer_list num_to_skip) override { throw oneapi::math::unimplemented("rng", "skip_ahead", - "initializer list unsupported by rocRAND backend"); + "initializer list unsupported by rocRAND backend"); } virtual void leapfrog(std::uint64_t idx, std::uint64_t stride) override { @@ -783,9 +786,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::standard>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } @@ -801,9 +804,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } - virtual void generate( - const oneapi::math::rng::uniform& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::uniform< + double, oneapi::math::rng::uniform_method::accurate>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } @@ -849,9 +852,9 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } - virtual void generate( - const oneapi::math::rng::lognormal& distr, - std::int64_t n, sycl::buffer& r) override { + virtual void generate(const oneapi::math::rng::lognormal< + double, oneapi::math::rng::lognormal_method::icdf>& distr, + std::int64_t n, sycl::buffer& r) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); } @@ -890,7 +893,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); return sycl::event{}; @@ -912,7 +916,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::uniform& distr, + const oneapi::math::rng::uniform& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); return sycl::event{}; @@ -957,8 +962,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& - distr, + const oneapi::math::rng::lognormal& distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); return sycl::event{}; @@ -972,7 +977,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { } virtual sycl::event generate( - const oneapi::math::rng::lognormal& distr, + const oneapi::math::rng::lognormal& + distr, std::int64_t n, double* r, const std::vector& dependencies) override { throw oneapi::math::unimplemented("rng", "philox4x32x10 engine"); return sycl::event{}; @@ -1033,7 +1039,8 @@ class philox4x32x10_impl : public oneapi::math::rng::detail::engine_impl { }; #endif -oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, std::uint64_t seed) { +oneapi::math::rng::detail::engine_impl* create_philox4x32x10(sycl::queue queue, + std::uint64_t seed) { return new philox4x32x10_impl(queue, seed); } diff --git a/src/rng/backends/rocrand/rocrand_task.hpp b/src/rng/backends/rocrand/rocrand_task.hpp index 149c0a70d..d7ece6c45 100644 --- a/src/rng/backends/rocrand/rocrand_task.hpp +++ b/src/rng/backends/rocrand/rocrand_task.hpp @@ -15,18 +15,18 @@ namespace rng { namespace rocrand { #ifdef __HIPSYCL__ template -static inline void host_task_internal(H &cgh, A acc, E e, F f) { +static inline void host_task_internal(H& cgh, A acc, E e, F f) { cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) { rocrand_status status; ROCRAND_CALL(rocrand_set_stream, status, e, ih.get_native_queue()); auto r_ptr = - reinterpret_cast(ih.get_native_mem(acc)); + reinterpret_cast(ih.get_native_mem(acc)); f(r_ptr); }); } template -static inline void host_task_internal(H &cgh, E e, F f) { +static inline void host_task_internal(H& cgh, E e, F f) { cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) { rocrand_status status; ROCRAND_CALL(rocrand_set_stream, status, e, ih.get_native_queue()); @@ -35,16 +35,16 @@ static inline void host_task_internal(H &cgh, E e, F f) { } #else template -static inline void host_task_internal(H &cgh, A acc, E e, F f) { +static inline void host_task_internal(H& cgh, A acc, E e, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else cgh.host_task([=](sycl::interop_handle ih) { #endif rocrand_status status; auto stream = ih.get_native_queue(); ROCRAND_CALL(rocrand_set_stream, status, e, stream); - auto r_ptr = reinterpret_cast( + auto r_ptr = reinterpret_cast( ih.get_native_mem(acc)); f(r_ptr); #ifndef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND @@ -55,9 +55,9 @@ static inline void host_task_internal(H &cgh, A acc, E e, F f) { } template -static inline void host_task_internal(H &cgh, E e, F f) { +static inline void host_task_internal(H& cgh, E e, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else cgh.host_task([=](sycl::interop_handle ih) { #endif @@ -73,12 +73,12 @@ static inline void host_task_internal(H &cgh, E e, F f) { } #endif template -static inline void onemath_rocrand_host_task(H &cgh, A acc, E e, F f) { +static inline void onemath_rocrand_host_task(H& cgh, A acc, E e, F f) { host_task_internal(cgh, acc, e, f); } template -static inline void onemath_rocrand_host_task(H &cgh, Engine e, F f) { +static inline void onemath_rocrand_host_task(H& cgh, Engine e, F f) { host_task_internal(cgh, e, f); } diff --git a/src/rng/function_table.hpp b/src/rng/function_table.hpp index da3ca1b6f..06c9c83e1 100644 --- a/src/rng/function_table.hpp +++ b/src/rng/function_table.hpp @@ -33,12 +33,12 @@ typedef struct { int version; oneapi::math::rng::detail::engine_impl* (*create_philox4x32x10_sycl)(sycl::queue queue, - std::uint64_t seed); + std::uint64_t seed); oneapi::math::rng::detail::engine_impl* (*create_philox4x32x10_ex_sycl)( sycl::queue queue, std::initializer_list seed); oneapi::math::rng::detail::engine_impl* (*create_mrg32k3a_sycl)(sycl::queue queue, - std::uint32_t seed); + std::uint32_t seed); oneapi::math::rng::detail::engine_impl* (*create_mrg32k3a_ex_sycl)( sycl::queue queue, std::initializer_list seed); } rng_function_table_t; diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx index 509b5a78a..bd39bba4c 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx @@ -19,22 +19,22 @@ // Dense vector template -void init_dense_vector(sycl::queue & /*queue*/, - oneapi::math::sparse::dense_vector_handle_t *p_dvhandle, std::int64_t size, +void init_dense_vector(sycl::queue& /*queue*/, + oneapi::math::sparse::dense_vector_handle_t* p_dvhandle, std::int64_t size, sycl::buffer val) { *p_dvhandle = new oneapi::math::sparse::dense_vector_handle(val, size); } template -void init_dense_vector(sycl::queue & /*queue*/, - oneapi::math::sparse::dense_vector_handle_t *p_dvhandle, std::int64_t size, - fpType *val) { +void init_dense_vector(sycl::queue& /*queue*/, + oneapi::math::sparse::dense_vector_handle_t* p_dvhandle, std::int64_t size, + fpType* val) { *p_dvhandle = new oneapi::math::sparse::dense_vector_handle(val, size); } template -void check_can_reset_value_handle(const std::string &function_name, - InternalHandleT *internal_handle, bool expect_buffer) { +void check_can_reset_value_handle(const std::string& function_name, + InternalHandleT* internal_handle, bool expect_buffer) { if (internal_handle->get_value_type() != detail::get_data_type()) { throw oneapi::math::invalid_argument( "sparse_blas", function_name, @@ -49,7 +49,7 @@ void check_can_reset_value_handle(const std::string &function_name, } template -void set_dense_vector_data(sycl::queue & /*queue*/, +void set_dense_vector_data(sycl::queue& /*queue*/, oneapi::math::sparse::dense_vector_handle_t dvhandle, std::int64_t size, sycl::buffer val) { check_can_reset_value_handle(__func__, dvhandle, true); @@ -58,40 +58,40 @@ void set_dense_vector_data(sycl::queue & /*queue*/, } template -void set_dense_vector_data(sycl::queue & /*queue*/, +void set_dense_vector_data(sycl::queue& /*queue*/, oneapi::math::sparse::dense_vector_handle_t dvhandle, std::int64_t size, - fpType *val) { + fpType* val) { check_can_reset_value_handle(__func__, dvhandle, false); dvhandle->size = size; dvhandle->set_usm_ptr(val); } -#define INSTANTIATE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ - template void init_dense_vector( \ +#define INSTANTIATE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ + template void init_dense_vector( \ sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, sycl::buffer val); \ - template void init_dense_vector( \ + std::int64_t size, sycl::buffer val); \ + template void init_dense_vector( \ sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, FP_TYPE * val); \ - template void set_dense_vector_data( \ + std::int64_t size, FP_TYPE* val); \ + template void set_dense_vector_data( \ sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t dvhandle, \ - std::int64_t size, sycl::buffer val); \ - template void set_dense_vector_data( \ + std::int64_t size, sycl::buffer val); \ + template void set_dense_vector_data( \ sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t dvhandle, \ - std::int64_t size, FP_TYPE * val) + std::int64_t size, FP_TYPE* val) FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_VECTOR_FUNCS); #undef INSTANTIATE_DENSE_VECTOR_FUNCS -sycl::event release_dense_vector(sycl::queue &queue, +sycl::event release_dense_vector(sycl::queue& queue, oneapi::math::sparse::dense_vector_handle_t dvhandle, - const std::vector &dependencies) { + const std::vector& dependencies) { return detail::submit_release(queue, dvhandle, dependencies); } // Dense matrix template -void init_dense_matrix(sycl::queue & /*queue*/, - oneapi::math::sparse::dense_matrix_handle_t *p_dmhandle, +void init_dense_matrix(sycl::queue& /*queue*/, + oneapi::math::sparse::dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::math::layout dense_layout, sycl::buffer val) { *p_dmhandle = @@ -99,16 +99,16 @@ void init_dense_matrix(sycl::queue & /*queue*/, } template -void init_dense_matrix(sycl::queue & /*queue*/, - oneapi::math::sparse::dense_matrix_handle_t *p_dmhandle, +void init_dense_matrix(sycl::queue& /*queue*/, + oneapi::math::sparse::dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, - oneapi::math::layout dense_layout, fpType *val) { + oneapi::math::layout dense_layout, fpType* val) { *p_dmhandle = new oneapi::math::sparse::dense_matrix_handle(val, num_rows, num_cols, ld, dense_layout); } template -void set_dense_matrix_data(sycl::queue & /*queue*/, +void set_dense_matrix_data(sycl::queue& /*queue*/, oneapi::math::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::math::layout dense_layout, sycl::buffer val) { @@ -121,10 +121,10 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, } template -void set_dense_matrix_data(sycl::queue & /*queue*/, +void set_dense_matrix_data(sycl::queue& /*queue*/, oneapi::math::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, - oneapi::math::layout dense_layout, fpType *val) { + oneapi::math::layout dense_layout, fpType* val) { check_can_reset_value_handle(__func__, dmhandle, false); dmhandle->num_rows = num_rows; dmhandle->num_cols = num_cols; @@ -133,35 +133,35 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, dmhandle->set_usm_ptr(val); } -#define INSTANTIATE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ - template void init_dense_matrix( \ +#define INSTANTIATE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ + template void init_dense_matrix( \ sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t * p_dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ oneapi::math::layout dense_layout, sycl::buffer val); \ - template void init_dense_matrix( \ + template void init_dense_matrix( \ sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t * p_dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::math::layout dense_layout, FP_TYPE * val); \ - template void set_dense_matrix_data( \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::math::layout dense_layout, FP_TYPE* val); \ + template void set_dense_matrix_data( \ sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ oneapi::math::layout dense_layout, sycl::buffer val); \ - template void set_dense_matrix_data( \ + template void set_dense_matrix_data( \ sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::math::layout dense_layout, FP_TYPE * val) + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::math::layout dense_layout, FP_TYPE* val) FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_MATRIX_FUNCS); #undef INSTANTIATE_DENSE_MATRIX_FUNCS -sycl::event release_dense_matrix(sycl::queue &queue, +sycl::event release_dense_matrix(sycl::queue& queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle, - const std::vector &dependencies) { + const std::vector& dependencies) { return detail::submit_release(queue, dmhandle, dependencies); } // COO matrix template -void init_coo_matrix(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t *p_smhandle, +void init_coo_matrix(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val) { @@ -169,32 +169,35 @@ void init_coo_matrix(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t * RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::init_matrix_handle(&mkl_handle)); auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ind, col_ind, val); // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_coo_data(queue, mkl_handle, static_cast(num_rows), - static_cast(num_cols), static_cast(nnz), - detail::get_onemkl_index_base(index), internal_smhandle->row_container.get_buffer(), - internal_smhandle->col_container.get_buffer(), - internal_smhandle->value_container.get_buffer())); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_coo_data( + queue, mkl_handle, static_cast(num_rows), static_cast(num_cols), + static_cast(nnz), detail::get_onemkl_index_base(index), + internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer())); *p_smhandle = reinterpret_cast(internal_smhandle); } template -void init_coo_matrix(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t *p_smhandle, +void init_coo_matrix(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - oneapi::math::index_base index, intType *row_ind, intType *col_ind, - fpType *val) { + oneapi::math::index_base index, intType* row_ind, intType* col_ind, + fpType* val) { oneapi::mkl::sparse::matrix_handle_t mkl_handle; RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::init_matrix_handle(&mkl_handle)); auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ind, col_ind, val); sycl::event event; - RETHROW_ONEMKL_EXCEPTIONS(event = oneapi::mkl::sparse::set_coo_data(queue, mkl_handle, static_cast(num_rows), static_cast(num_cols), - static_cast(nnz), detail::get_onemkl_index_base(index), row_ind, col_ind, val)); + RETHROW_ONEMKL_EXCEPTIONS(event = oneapi::mkl::sparse::set_coo_data( + queue, mkl_handle, static_cast(num_rows), + static_cast(num_cols), static_cast(nnz), + detail::get_onemkl_index_base(index), row_ind, col_ind, val)); event.wait_and_throw(); *p_smhandle = reinterpret_cast(internal_smhandle); } template -void check_can_reset_sparse_handle(const std::string &function_name, - detail::sparse_matrix_handle *internal_smhandle, +void check_can_reset_sparse_handle(const std::string& function_name, + detail::sparse_matrix_handle* internal_smhandle, bool expect_buffer) { check_can_reset_value_handle(function_name, internal_smhandle, expect_buffer); if (internal_smhandle->get_int_type() != detail::get_data_type()) { @@ -212,7 +215,7 @@ void check_can_reset_sparse_handle(const std::string &function_name, } template -void set_coo_matrix_data(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t smhandle, +void set_coo_matrix_data(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val) { @@ -222,19 +225,20 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::math::sparse::matrix_handle internal_smhandle->col_container.set_buffer(col_ind); internal_smhandle->value_container.set_buffer(val); // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_coo_data(queue, internal_smhandle->backend_handle, - static_cast(num_rows), - static_cast(num_cols), static_cast(nnz), - detail::get_onemkl_index_base(index), internal_smhandle->row_container.get_buffer(), - internal_smhandle->col_container.get_buffer(), - internal_smhandle->value_container.get_buffer())); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_coo_data( + queue, internal_smhandle->backend_handle, static_cast(num_rows), + static_cast(num_cols), static_cast(nnz), + detail::get_onemkl_index_base(index), + internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer())); } template -void set_coo_matrix_data(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t smhandle, +void set_coo_matrix_data(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - oneapi::math::index_base index, intType *row_ind, intType *col_ind, - fpType *val) { + oneapi::math::index_base index, intType* row_ind, intType* col_ind, + fpType* val) { auto internal_smhandle = detail::get_internal_handle(smhandle); check_can_reset_sparse_handle(__func__, internal_smhandle, false); internal_smhandle->row_container.set_usm_ptr(row_ind); @@ -242,35 +246,36 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::math::sparse::matrix_handle internal_smhandle->value_container.set_usm_ptr(val); auto event = oneapi::mkl::sparse::set_coo_data( queue, internal_smhandle->backend_handle, static_cast(num_rows), - static_cast(num_cols), static_cast(nnz), detail::get_onemkl_index_base(index), row_ind, col_ind, val); + static_cast(num_cols), static_cast(nnz), + detail::get_onemkl_index_base(index), row_ind, col_ind, val); event.wait_and_throw(); } -#define INSTANTIATE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ - template void init_coo_matrix( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::math::index_base index, sycl::buffer row_ind, \ - sycl::buffer col_ind, sycl::buffer val); \ - template void init_coo_matrix( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::math::index_base index, INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val); \ - template void set_coo_matrix_data( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, \ - sycl::buffer row_ind, sycl::buffer col_ind, \ - sycl::buffer val); \ - template void set_coo_matrix_data( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, \ - INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val) +#define INSTANTIATE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template void init_coo_matrix( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, sycl::buffer row_ind, \ + sycl::buffer col_ind, sycl::buffer val); \ + template void init_coo_matrix( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val); \ + template void set_coo_matrix_data( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, sycl::buffer row_ind, \ + sycl::buffer col_ind, sycl::buffer val); \ + template void set_coo_matrix_data( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val) FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_COO_MATRIX_FUNCS); #undef INSTANTIATE_COO_MATRIX_FUNCS // CSR matrix template -void init_csr_matrix(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t *p_smhandle, +void init_csr_matrix(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, oneapi::math::index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val) { @@ -279,32 +284,35 @@ void init_csr_matrix(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t * auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ptr, col_ind, val); // The backend deduces nnz from row_ptr. // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_csr_data(queue, mkl_handle, static_cast(num_rows), - static_cast(num_cols), detail::get_onemkl_index_base(index), - internal_smhandle->row_container.get_buffer(), - internal_smhandle->col_container.get_buffer(), - internal_smhandle->value_container.get_buffer())); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_csr_data( + queue, mkl_handle, static_cast(num_rows), static_cast(num_cols), + detail::get_onemkl_index_base(index), + internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer())); *p_smhandle = reinterpret_cast(internal_smhandle); } template -void init_csr_matrix(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t *p_smhandle, +void init_csr_matrix(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, - oneapi::math::index_base index, intType *row_ptr, intType *col_ind, - fpType *val) { + oneapi::math::index_base index, intType* row_ptr, intType* col_ind, + fpType* val) { oneapi::mkl::sparse::matrix_handle_t mkl_handle; RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::init_matrix_handle(&mkl_handle)); auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ptr, col_ind, val); // The backend deduces nnz from row_ptr. sycl::event event; - RETHROW_ONEMKL_EXCEPTIONS(event = oneapi::mkl::sparse::set_csr_data(queue, mkl_handle, static_cast(num_rows), static_cast(num_cols), detail::get_onemkl_index_base(index), - row_ptr, col_ind, val)); + RETHROW_ONEMKL_EXCEPTIONS(event = oneapi::mkl::sparse::set_csr_data( + queue, mkl_handle, static_cast(num_rows), + static_cast(num_cols), + detail::get_onemkl_index_base(index), row_ptr, col_ind, val)); event.wait_and_throw(); *p_smhandle = reinterpret_cast(internal_smhandle); } template -void set_csr_matrix_data(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t smhandle, +void set_csr_matrix_data(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, oneapi::math::index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val) { @@ -315,19 +323,19 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::math::sparse::matrix_handle internal_smhandle->value_container.set_buffer(val); // The backend deduces nnz from row_ptr. // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_csr_data(queue, internal_smhandle->backend_handle, - static_cast(num_rows), - static_cast(num_cols), detail::get_onemkl_index_base(index), - internal_smhandle->row_container.get_buffer(), - internal_smhandle->col_container.get_buffer(), - internal_smhandle->value_container.get_buffer())); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_csr_data( + queue, internal_smhandle->backend_handle, static_cast(num_rows), + static_cast(num_cols), detail::get_onemkl_index_base(index), + internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer())); } template -void set_csr_matrix_data(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t smhandle, +void set_csr_matrix_data(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, - oneapi::math::index_base index, intType *row_ptr, intType *col_ind, - fpType *val) { + oneapi::math::index_base index, intType* row_ptr, intType* col_ind, + fpType* val) { auto internal_smhandle = detail::get_internal_handle(smhandle); check_can_reset_sparse_handle(__func__, internal_smhandle, false); internal_smhandle->row_container.set_usm_ptr(row_ptr); @@ -336,35 +344,37 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::math::sparse::matrix_handle // The backend deduces nnz from row_ptr. auto event = oneapi::mkl::sparse::set_csr_data( queue, internal_smhandle->backend_handle, static_cast(num_rows), - static_cast(num_cols), detail::get_onemkl_index_base(index), row_ptr, col_ind, val); + static_cast(num_cols), detail::get_onemkl_index_base(index), row_ptr, col_ind, + val); event.wait_and_throw(); } -#define INSTANTIATE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ - template void init_csr_matrix( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::math::index_base index, sycl::buffer row_ptr, \ - sycl::buffer col_ind, sycl::buffer val); \ - template void init_csr_matrix( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::math::index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val); \ - template void set_csr_matrix_data( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, \ - sycl::buffer row_ptr, sycl::buffer col_ind, \ - sycl::buffer val); \ - template void set_csr_matrix_data( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, \ - INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val) +#define INSTANTIATE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template void init_csr_matrix( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, sycl::buffer row_ptr, \ + sycl::buffer col_ind, sycl::buffer val); \ + template void init_csr_matrix( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val); \ + template void set_csr_matrix_data( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, sycl::buffer row_ptr, \ + sycl::buffer col_ind, sycl::buffer val); \ + template void set_csr_matrix_data( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val) FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_CSR_MATRIX_FUNCS); #undef INSTANTIATE_CSR_MATRIX_FUNCS // Common sparse matrix functions -sycl::event release_sparse_matrix(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t smhandle, - const std::vector &dependencies) { +sycl::event release_sparse_matrix(sycl::queue& queue, + oneapi::math::sparse::matrix_handle_t smhandle, + const std::vector& dependencies) { auto internal_smhandle = detail::get_internal_handle(smhandle); // Asynchronously release the backend's handle followed by the internal handle. auto event = oneapi::mkl::sparse::release_matrix_handle( @@ -372,7 +382,7 @@ sycl::event release_sparse_matrix(sycl::queue &queue, oneapi::math::sparse::matr return detail::submit_release(queue, internal_smhandle, event); } -bool set_matrix_property(sycl::queue & /*queue*/, oneapi::math::sparse::matrix_handle_t smhandle, +bool set_matrix_property(sycl::queue& /*queue*/, oneapi::math::sparse::matrix_handle_t smhandle, oneapi::math::sparse::matrix_property property) { auto internal_smhandle = detail::get_internal_handle(smhandle); // Store the matrix property internally for better error checking @@ -381,12 +391,12 @@ bool set_matrix_property(sycl::queue & /*queue*/, oneapi::math::sparse::matrix_h // Backend and oneMath types for the property don't match switch (property) { case oneapi::math::sparse::matrix_property::symmetric: - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_matrix_property(internal_smhandle->backend_handle, - oneapi::mkl::sparse::property::symmetric)); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_matrix_property( + internal_smhandle->backend_handle, oneapi::mkl::sparse::property::symmetric)); return true; case oneapi::math::sparse::matrix_property::sorted: - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_matrix_property(internal_smhandle->backend_handle, - oneapi::mkl::sparse::property::sorted)); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::set_matrix_property( + internal_smhandle->backend_handle, oneapi::mkl::sparse::property::sorted)); return true; default: return false; } diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp index 8401d76c6..e7a7527f2 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -39,15 +39,15 @@ using namespace oneapi::math::detail; /// Return whether a pointer is accessible on the host template -inline bool is_ptr_accessible_on_host(sycl::queue &queue, const T *host_or_device_ptr) { +inline bool is_ptr_accessible_on_host(sycl::queue& queue, const T* host_or_device_ptr) { auto alloc_type = sycl::get_pointer_type(host_or_device_ptr, queue.get_context()); return alloc_type == sycl::usm::alloc::host || alloc_type == sycl::usm::alloc::shared || alloc_type == sycl::usm::alloc::unknown; } /// Throw an exception if the scalar is not accessible in the host -inline void check_ptr_is_host_accessible(const std::string &function_name, - const std::string &scalar_name, +inline void check_ptr_is_host_accessible(const std::string& function_name, + const std::string& scalar_name, bool is_ptr_accessible_on_host) { if (!is_ptr_accessible_on_host) { throw math::invalid_argument( @@ -59,7 +59,7 @@ inline void check_ptr_is_host_accessible(const std::string &function_name, /// Return a scalar on the host from a pointer to host or device memory /// Used for USM functions template -inline T get_scalar_on_host(sycl::queue &queue, const T *host_or_device_ptr, +inline T get_scalar_on_host(sycl::queue& queue, const T* host_or_device_ptr, bool is_ptr_accessible_on_host) { if (is_ptr_accessible_on_host) { return *host_or_device_ptr; @@ -71,8 +71,8 @@ inline T get_scalar_on_host(sycl::queue &queue, const T *host_or_device_ptr, } /// Merge multiple event dependencies into one -inline sycl::event collapse_dependencies(sycl::queue &queue, - const std::vector &dependencies) { +inline sycl::event collapse_dependencies(sycl::queue& queue, + const std::vector& dependencies) { if (dependencies.empty()) { return {}; } @@ -80,7 +80,7 @@ inline sycl::event collapse_dependencies(sycl::queue &queue, return dependencies[0]; } - return queue.submit([&](sycl::handler &cgh) { + return queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); cgh.host_task([=]() {}); }); @@ -95,7 +95,7 @@ inline sycl::event collapse_dependencies(sycl::queue &queue, case detail::data_type::complex_fp64: \ return op_functor>(__VA_ARGS__); \ default: \ - throw oneapi::math::exception( \ + throw oneapi::math::exception( \ "sparse_blas", function_name, \ "Internal error: unsupported type " + data_type_to_str(value_type)); \ } @@ -103,7 +103,7 @@ inline sycl::event collapse_dependencies(sycl::queue &queue, #define CHECK_DESCR_MATCH(descr, argument, optimize_func_name) \ do { \ if (descr->last_optimized_##argument != argument) { \ - throw math::invalid_argument( \ + throw math::invalid_argument( \ "sparse_blas", __func__, \ #argument " argument must match with the previous call to " #optimize_func_name); \ } \ diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index 92cb6ec83..357d22bd6 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -35,16 +35,16 @@ struct spmm_descr { namespace oneapi::math::sparse::BACKEND { -void init_spmm_descr(sycl::queue & /*queue*/, oneapi::math::sparse::spmm_descr_t *p_spmm_descr) { +void init_spmm_descr(sycl::queue& /*queue*/, oneapi::math::sparse::spmm_descr_t* p_spmm_descr) { *p_spmm_descr = new spmm_descr(); } -sycl::event release_spmm_descr(sycl::queue &queue, oneapi::math::sparse::spmm_descr_t spmm_descr, - const std::vector &dependencies) { +sycl::event release_spmm_descr(sycl::queue& queue, oneapi::math::sparse::spmm_descr_t spmm_descr, + const std::vector& dependencies) { return detail::submit_release(queue, spmm_descr, dependencies); } -void check_valid_spmm(const std::string &function_name, oneapi::math::transpose opA, +void check_valid_spmm(const std::string& function_name, oneapi::math::transpose opA, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_matrix_handle_t B_handle, @@ -67,17 +67,17 @@ void check_valid_spmm(const std::string &function_name, oneapi::math::transpose } if (B_handle->dense_layout != C_handle->dense_layout) { throw math::invalid_argument("sparse_blas", function_name, - "B and C matrices must used the same layout."); + "B and C matrices must used the same layout."); } if (A_view.type_view != oneapi::math::sparse::matrix_descr::general) { throw math::invalid_argument("sparse_blas", function_name, - "Matrix view's type must be `matrix_descr::general`."); + "Matrix view's type must be `matrix_descr::general`."); } if (A_view.diag_view != oneapi::math::diag::nonunit) { throw math::invalid_argument("sparse_blas", function_name, - "Matrix's diag_view must be `nonunit`."); + "Matrix's diag_view must be `nonunit`."); } #if BACKEND == gpu @@ -95,14 +95,15 @@ void check_valid_spmm(const std::string &function_name, oneapi::math::transpose #endif // BACKEND } -void spmm_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose /*opB*/, const void *alpha, +void spmm_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose /*opB*/, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg /*alg*/, - oneapi::math::sparse::spmm_descr_t spmm_descr, std::size_t &temp_buffer_size) { + oneapi::math::sparse::spmm_descr_t spmm_descr, + std::size_t& temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMath backend supports it. bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); @@ -113,9 +114,9 @@ void spmm_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, } inline void common_spmm_optimize( - sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, const void *alpha, + sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg alg, oneapi::math::sparse::spmm_descr_t spmm_descr) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); @@ -124,7 +125,7 @@ inline void common_spmm_optimize( is_alpha_host_accessible, is_beta_host_accessible); if (!spmm_descr->buffer_size_called) { throw math::uninitialized("sparse_blas", "spmm_optimize", - "spmm_buffer_size must be called before spmm_optimize."); + "spmm_buffer_size must be called before spmm_optimize."); } spmm_descr->optimized_called = true; spmm_descr->last_optimized_opA = opA; @@ -136,12 +137,13 @@ inline void common_spmm_optimize( spmm_descr->last_optimized_alg = alg; } -void spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, oneapi::math::sparse::matrix_view A_view, +void spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, + const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, - oneapi::math::sparse::spmm_alg alg, oneapi::math::sparse::spmm_descr_t spmm_descr, + oneapi::math::sparse::spmm_alg alg, + oneapi::math::sparse::spmm_descr_t spmm_descr, sycl::buffer /*workspace*/) { auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { @@ -156,15 +158,15 @@ void spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math // TODO: Add support for spmm_optimize once the close-source oneMath backend supports it. } -sycl::event spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, +sycl::event spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg alg, - oneapi::math::sparse::spmm_descr_t spmm_descr, void * /*workspace*/, - const std::vector &dependencies) { + oneapi::math::sparse::spmm_descr_t spmm_descr, void* /*workspace*/, + const std::vector& dependencies) { auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -181,16 +183,16 @@ sycl::event spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, template sycl::event internal_spmm( - sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, const void *alpha, + sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, const void* alpha, oneapi::math::sparse::matrix_view /*A_view*/, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg /*alg*/, - oneapi::math::sparse::spmm_descr_t /*spmm_descr*/, const std::vector &dependencies, + oneapi::math::sparse::spmm_descr_t /*spmm_descr*/, const std::vector& dependencies, bool is_alpha_host_accessible, bool is_beta_host_accessible) { T host_alpha = - detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); T host_beta = - detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); + detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto onemkl_layout = detail::get_onemkl_layout(B_handle->dense_layout); @@ -200,27 +202,28 @@ sycl::event internal_spmm( auto ldb = B_handle->ld; auto ldc = C_handle->ld; if (internal_A_handle->all_use_buffer()) { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::gemm(queue, onemkl_layout, onemkl_opa, onemkl_opb, host_alpha, - internal_A_handle->backend_handle, B_handle->get_buffer(), - columns, ldb, host_beta, C_handle->get_buffer(), ldc)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::sparse::gemm(queue, onemkl_layout, onemkl_opa, onemkl_opb, host_alpha, + internal_A_handle->backend_handle, B_handle->get_buffer(), + columns, ldb, host_beta, C_handle->get_buffer(), ldc)); // Dependencies are not used for buffers return {}; } else { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::gemm(queue, onemkl_layout, onemkl_opa, onemkl_opb, host_alpha, - internal_A_handle->backend_handle, - B_handle->get_usm_ptr(), columns, ldb, host_beta, - C_handle->get_usm_ptr(), ldc, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::gemm( + queue, onemkl_layout, onemkl_opa, onemkl_opb, host_alpha, + internal_A_handle->backend_handle, B_handle->get_usm_ptr(), columns, ldb, host_beta, + C_handle->get_usm_ptr(), ldc, dependencies)); } } -sycl::event spmm(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, oneapi::math::sparse::matrix_view A_view, +sycl::event spmm(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, + const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg alg, oneapi::math::sparse::spmm_descr_t spmm_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, @@ -228,7 +231,7 @@ sycl::event spmm(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math:: if (!spmm_descr->optimized_called) { throw math::uninitialized("sparse_blas", __func__, - "spmm_optimize must be called before spmm."); + "spmm_optimize must be called before spmm."); } CHECK_DESCR_MATCH(spmm_descr, opA, "spmm_optimize"); CHECK_DESCR_MATCH(spmm_descr, opB, "spmm_optimize"); diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 5d03466fd..b434e15a4 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -34,16 +34,16 @@ struct spmv_descr { namespace oneapi::math::sparse::BACKEND { -void init_spmv_descr(sycl::queue & /*queue*/, oneapi::math::sparse::spmv_descr_t *p_spmv_descr) { +void init_spmv_descr(sycl::queue& /*queue*/, oneapi::math::sparse::spmv_descr_t* p_spmv_descr) { *p_spmv_descr = new spmv_descr(); } -sycl::event release_spmv_descr(sycl::queue &queue, oneapi::math::sparse::spmv_descr_t spmv_descr, - const std::vector &dependencies) { +sycl::event release_spmv_descr(sycl::queue& queue, oneapi::math::sparse::spmv_descr_t spmv_descr, + const std::vector& dependencies) { return detail::submit_release(queue, spmv_descr, dependencies); } -void check_valid_spmv(const std::string &function_name, oneapi::math::transpose opA, +void check_valid_spmv(const std::string& function_name, oneapi::math::transpose opA, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, @@ -66,7 +66,7 @@ void check_valid_spmv(const std::string &function_name, oneapi::math::transpose } if (A_view.type_view == oneapi::math::sparse::matrix_descr::diagonal) { throw math::invalid_argument("sparse_blas", function_name, - "Matrix view's type cannot be diagonal."); + "Matrix view's type cannot be diagonal."); } if (A_view.type_view != oneapi::math::sparse::matrix_descr::triangular && @@ -85,13 +85,14 @@ void check_valid_spmv(const std::string &function_name, oneapi::math::transpose } } -void spmv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spmv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg /*alg*/, - oneapi::math::sparse::spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + oneapi::math::sparse::spmv_descr_t spmv_descr, + std::size_t& temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMath backend supports it. bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); @@ -101,11 +102,11 @@ void spmv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, const voi spmv_descr->buffer_size_called = true; } -inline void common_spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +inline void common_spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, - const void *beta, + const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::spmv_descr_t spmv_descr) { @@ -115,7 +116,7 @@ inline void common_spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA is_alpha_host_accessible, is_beta_host_accessible); if (!spmv_descr->buffer_size_called) { throw math::uninitialized("sparse_blas", "spmv_optimize", - "spmv_buffer_size must be called before spmv_optimize."); + "spmv_buffer_size must be called before spmv_optimize."); } spmv_descr->optimized_called = true; spmv_descr->last_optimized_opA = opA; @@ -126,12 +127,13 @@ inline void common_spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA spmv_descr->last_optimized_alg = alg; } -void spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, - oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::spmv_descr_t spmv_descr, + oneapi::math::sparse::spmv_alg alg, + oneapi::math::sparse::spmv_descr_t spmv_descr, sycl::buffer /*workspace*/) { auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { @@ -147,8 +149,8 @@ void spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void * auto onemkl_opa = detail::get_onemkl_transpose(opA); auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view); if (A_view.type_view == matrix_descr::triangular) { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::optimize_trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, - internal_A_handle->backend_handle)); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::optimize_trmv( + queue, onemkl_uplo, onemkl_opa, onemkl_diag, internal_A_handle->backend_handle)); } else if (A_view.type_view == matrix_descr::symmetric || A_view.type_view == matrix_descr::hermitian) { @@ -156,18 +158,19 @@ void spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void * return; } else { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::optimize_gemv(queue, onemkl_opa, internal_A_handle->backend_handle)); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::optimize_gemv( + queue, onemkl_opa, internal_A_handle->backend_handle)); } } -sycl::event spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg alg, - oneapi::math::sparse::spmv_descr_t spmv_descr, void * /*workspace*/, - const std::vector &dependencies) { + oneapi::math::sparse::spmv_descr_t spmv_descr, void* /*workspace*/, + const std::vector& dependencies) { auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -182,33 +185,34 @@ sycl::event spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const auto onemkl_opa = detail::get_onemkl_transpose(opA); auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view); if (A_view.type_view == matrix_descr::triangular) { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::optimize_trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, - internal_A_handle->backend_handle, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + oneapi::mkl::sparse::optimize_trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, + internal_A_handle->backend_handle, dependencies)); } else if (A_view.type_view == matrix_descr::symmetric || A_view.type_view == matrix_descr::hermitian) { return detail::collapse_dependencies(queue, dependencies); } else { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::optimize_gemv(queue, onemkl_opa, internal_A_handle->backend_handle, - dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::optimize_gemv( + queue, onemkl_opa, internal_A_handle->backend_handle, dependencies)); } } template -sycl::event internal_spmv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event internal_spmv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg /*alg*/, oneapi::math::sparse::spmv_descr_t /*spmv_descr*/, - const std::vector &dependencies, + const std::vector& dependencies, bool is_alpha_host_accessible, bool is_beta_host_accessible) { T host_alpha = - detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); T host_beta = - detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); + detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto backend_handle = internal_A_handle->backend_handle; @@ -219,17 +223,18 @@ sycl::event internal_spmv(sycl::queue &queue, oneapi::math::transpose opA, const auto x_buffer = x_handle->get_buffer(); auto y_buffer = y_handle->get_buffer(); if (A_view.type_view == matrix_descr::triangular) { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha, - backend_handle, x_buffer, host_beta, y_buffer)); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::sparse::trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha, + backend_handle, x_buffer, host_beta, y_buffer)); } else if (A_view.type_view == matrix_descr::symmetric || A_view.type_view == matrix_descr::hermitian) { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::symv(queue, onemkl_uplo, host_alpha, backend_handle, x_buffer, - host_beta, y_buffer)); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::symv( + queue, onemkl_uplo, host_alpha, backend_handle, x_buffer, host_beta, y_buffer)); } else { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::gemv(queue, onemkl_opa, host_alpha, backend_handle, x_buffer, host_beta, - y_buffer)); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::gemv( + queue, onemkl_opa, host_alpha, backend_handle, x_buffer, host_beta, y_buffer)); } // Dependencies are not used for buffers return {}; @@ -238,29 +243,31 @@ sycl::event internal_spmv(sycl::queue &queue, oneapi::math::transpose opA, const auto x_usm = x_handle->get_usm_ptr(); auto y_usm = y_handle->get_usm_ptr(); if (A_view.type_view == matrix_descr::triangular) { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, - host_alpha, backend_handle, x_usm, host_beta, y_usm, - dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + oneapi::mkl::sparse::trmv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha, + backend_handle, x_usm, host_beta, y_usm, dependencies)); } else if (A_view.type_view == matrix_descr::symmetric || A_view.type_view == matrix_descr::hermitian) { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::symv(queue, onemkl_uplo, host_alpha, backend_handle, - x_usm, host_beta, y_usm, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + oneapi::mkl::sparse::symv(queue, onemkl_uplo, host_alpha, backend_handle, x_usm, + host_beta, y_usm, dependencies)); } else { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::gemv(queue, onemkl_opa, host_alpha, backend_handle, x_usm, - host_beta, y_usm, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + oneapi::mkl::sparse::gemv(queue, onemkl_opa, host_alpha, backend_handle, x_usm, + host_beta, y_usm, dependencies)); } } } -sycl::event spmv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spmv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::spmv_descr_t spmv_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, @@ -268,7 +275,7 @@ sycl::event spmv(sycl::queue &queue, oneapi::math::transpose opA, const void *al if (!spmv_descr->optimized_called) { throw math::uninitialized("sparse_blas", __func__, - "spmv_optimize must be called before spmv."); + "spmv_optimize must be called before spmv."); } CHECK_DESCR_MATCH(spmv_descr, opA, "spmv_optimize"); CHECK_DESCR_MATCH(spmv_descr, A_view, "spmv_optimize"); diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index df998c59a..963f84927 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -34,16 +34,16 @@ struct spsv_descr { namespace oneapi::math::sparse::BACKEND { -void init_spsv_descr(sycl::queue & /*queue*/, oneapi::math::sparse::spsv_descr_t *p_spsv_descr) { +void init_spsv_descr(sycl::queue& /*queue*/, oneapi::math::sparse::spsv_descr_t* p_spsv_descr) { *p_spsv_descr = new spsv_descr(); } -sycl::event release_spsv_descr(sycl::queue &queue, oneapi::math::sparse::spsv_descr_t spsv_descr, - const std::vector &dependencies) { +sycl::event release_spsv_descr(sycl::queue& queue, oneapi::math::sparse::spsv_descr_t spsv_descr, + const std::vector& dependencies) { return detail::submit_release(queue, spsv_descr, dependencies); } -void check_valid_spsv(const std::string &function_name, oneapi::math::transpose opA, +void check_valid_spsv(const std::string& function_name, oneapi::math::transpose opA, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, @@ -67,7 +67,7 @@ void check_valid_spsv(const std::string &function_name, oneapi::math::transpose data_type == detail::data_type::complex_fp64) && opA == oneapi::math::transpose::conjtrans) { throw math::unimplemented("sparse_blas", function_name, - "The backend does not support spsv using conjtrans."); + "The backend does not support spsv using conjtrans."); } #else (void)opA; @@ -76,7 +76,7 @@ void check_valid_spsv(const std::string &function_name, oneapi::math::transpose detail::check_all_containers_compatible(function_name, internal_A_handle, x_handle, y_handle); if (A_view.type_view != matrix_descr::triangular) { throw math::invalid_argument("sparse_blas", function_name, - "Matrix view's type must be `matrix_descr::triangular`."); + "Matrix view's type must be `matrix_descr::triangular`."); } if (internal_A_handle->all_use_buffer()) { @@ -84,13 +84,14 @@ void check_valid_spsv(const std::string &function_name, oneapi::math::transpose } } -void spsv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spsv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spsv_alg alg, - oneapi::math::sparse::spsv_descr_t spsv_descr, std::size_t &temp_buffer_size) { + oneapi::math::sparse::spsv_descr_t spsv_descr, + std::size_t& temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMath backend supports it. bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, @@ -99,7 +100,7 @@ void spsv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, const voi spsv_descr->buffer_size_called = true; } -inline void common_spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +inline void common_spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, @@ -111,7 +112,7 @@ inline void common_spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA is_alpha_host_accessible, alg); if (!spsv_descr->buffer_size_called) { throw math::uninitialized("sparse_blas", "spsv_optimize", - "spsv_buffer_size must be called before spsv_optimize."); + "spsv_buffer_size must be called before spsv_optimize."); } spsv_descr->optimized_called = true; spsv_descr->last_optimized_opA = opA; @@ -122,12 +123,13 @@ inline void common_spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA spsv_descr->last_optimized_alg = alg; } -void spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, oneapi::math::sparse::dense_vector_handle_t y_handle, - oneapi::math::sparse::spsv_alg alg, oneapi::math::sparse::spsv_descr_t spsv_descr, + oneapi::math::sparse::spsv_alg alg, + oneapi::math::sparse::spsv_descr_t spsv_descr, sycl::buffer /*workspace*/) { auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { @@ -141,18 +143,18 @@ void spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void * auto onemkl_uplo = detail::get_onemkl_uplo(A_view.uplo_view); auto onemkl_opa = detail::get_onemkl_transpose(opA); auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view); - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::optimize_trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, - internal_A_handle->backend_handle)); + RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::optimize_trsv( + queue, onemkl_uplo, onemkl_opa, onemkl_diag, internal_A_handle->backend_handle)); } -sycl::event spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spsv_alg alg, - oneapi::math::sparse::spsv_descr_t spsv_descr, void * /*workspace*/, - const std::vector &dependencies) { + oneapi::math::sparse::spsv_descr_t spsv_descr, void* /*workspace*/, + const std::vector& dependencies) { auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -165,56 +167,58 @@ sycl::event spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const auto onemkl_uplo = detail::get_onemkl_uplo(A_view.uplo_view); auto onemkl_opa = detail::get_onemkl_transpose(opA); auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view); - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::optimize_trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, - internal_A_handle->backend_handle, dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + oneapi::mkl::sparse::optimize_trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, + internal_A_handle->backend_handle, dependencies)); } template -sycl::event internal_spsv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event internal_spsv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spsv_alg /*alg*/, oneapi::math::sparse::spsv_descr_t /*spsv_descr*/, - const std::vector &dependencies, + const std::vector& dependencies, bool is_alpha_host_accessible) { T host_alpha = - detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto onemkl_uplo = detail::get_onemkl_uplo(A_view.uplo_view); auto onemkl_opa = detail::get_onemkl_transpose(opA); auto onemkl_diag = detail::get_onemkl_diag(A_view.diag_view); if (internal_A_handle->all_use_buffer()) { - RETHROW_ONEMKL_EXCEPTIONS(oneapi::mkl::sparse::trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha, - internal_A_handle->backend_handle, x_handle->get_buffer(), - y_handle->get_buffer())); + RETHROW_ONEMKL_EXCEPTIONS( + oneapi::mkl::sparse::trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha, + internal_A_handle->backend_handle, x_handle->get_buffer(), + y_handle->get_buffer())); // Dependencies are not used for buffers return {}; } else { - RETHROW_ONEMKL_EXCEPTIONS_RET(oneapi::mkl::sparse::trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha, - internal_A_handle->backend_handle, - x_handle->get_usm_ptr(), y_handle->get_usm_ptr(), - dependencies)); + RETHROW_ONEMKL_EXCEPTIONS_RET( + oneapi::mkl::sparse::trsv(queue, onemkl_uplo, onemkl_opa, onemkl_diag, host_alpha, + internal_A_handle->backend_handle, x_handle->get_usm_ptr(), + y_handle->get_usm_ptr(), dependencies)); } } -sycl::event spsv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spsv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spsv_alg alg, oneapi::math::sparse::spsv_descr_t spsv_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, alg); if (!spsv_descr->optimized_called) { throw math::uninitialized("sparse_blas", __func__, - "spsv_optimize must be called before spsv."); + "spsv_optimize must be called before spsv."); } CHECK_DESCR_MATCH(spsv_descr, opA, "spsv_optimize"); CHECK_DESCR_MATCH(spsv_descr, A_view, "spsv_optimize"); diff --git a/src/sparse_blas/function_table.hpp b/src/sparse_blas/function_table.hpp index 3667ff868..6139a6381 100644 --- a/src/sparse_blas/function_table.hpp +++ b/src/sparse_blas/function_table.hpp @@ -24,95 +24,95 @@ #include "sparse_blas/macros.hpp" // Dense vector -#define DEFINE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ - void (*init_dense_vector_buffer##FP_SUFFIX)( \ +#define DEFINE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ + void (*init_dense_vector_buffer##FP_SUFFIX)( \ sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, sycl::buffer val); \ - void (*init_dense_vector_usm##FP_SUFFIX)( \ + std::int64_t size, sycl::buffer val); \ + void (*init_dense_vector_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, FP_TYPE * val); \ - void (*set_dense_vector_data_buffer##FP_SUFFIX)( \ + std::int64_t size, FP_TYPE* val); \ + void (*set_dense_vector_data_buffer##FP_SUFFIX)( \ sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t dvhandle, \ - std::int64_t size, sycl::buffer val); \ - void (*set_dense_vector_data_usm##FP_SUFFIX)( \ + std::int64_t size, sycl::buffer val); \ + void (*set_dense_vector_data_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::math::sparse::dense_vector_handle_t dvhandle, \ - std::int64_t size, FP_TYPE * val) + std::int64_t size, FP_TYPE* val) // Dense matrix -#define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ - void (*init_dense_matrix_buffer##FP_SUFFIX)( \ +#define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ + void (*init_dense_matrix_buffer##FP_SUFFIX)( \ sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t * p_dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ oneapi::math::layout dense_layout, sycl::buffer val); \ - void (*init_dense_matrix_usm##FP_SUFFIX)( \ + void (*init_dense_matrix_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t * p_dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::math::layout dense_layout, FP_TYPE * val); \ - void (*set_dense_matrix_data_buffer##FP_SUFFIX)( \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::math::layout dense_layout, FP_TYPE* val); \ + void (*set_dense_matrix_data_buffer##FP_SUFFIX)( \ sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ oneapi::math::layout dense_layout, sycl::buffer val); \ - void (*set_dense_matrix_data_usm##FP_SUFFIX)( \ + void (*set_dense_matrix_data_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::math::layout dense_layout, FP_TYPE * val) + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::math::layout dense_layout, FP_TYPE* val) // COO matrix -#define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ - void (*init_coo_matrix_buffer##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::math::index_base index, sycl::buffer row_ind, \ - sycl::buffer col_ind, sycl::buffer val); \ - void (*init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::math::index_base index, INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val); \ - void (*set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, \ - sycl::buffer row_ind, sycl::buffer col_ind, \ - sycl::buffer val); \ - void (*set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, \ - INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val) +#define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + void (*init_coo_matrix_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, sycl::buffer row_ind, \ + sycl::buffer col_ind, sycl::buffer val); \ + void (*init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val); \ + void (*set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, sycl::buffer row_ind, \ + sycl::buffer col_ind, sycl::buffer val); \ + void (*set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val) // CSR matrix -#define DEFINE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ - void (*init_csr_matrix_buffer##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::math::index_base index, sycl::buffer row_ptr, \ - sycl::buffer col_ind, sycl::buffer val); \ - void (*init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::math::index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val); \ - void (*set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, \ - sycl::buffer row_ptr, sycl::buffer col_ind, \ - sycl::buffer val); \ - void (*set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, \ - INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val) +#define DEFINE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + void (*init_csr_matrix_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, sycl::buffer row_ptr, \ + sycl::buffer col_ind, sycl::buffer val); \ + void (*init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val); \ + void (*set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, sycl::buffer row_ptr, \ + sycl::buffer col_ind, sycl::buffer val); \ + void (*set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::math::sparse::matrix_handle_t smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::math::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val) typedef struct { int version; // Dense vector FOR_EACH_FP_TYPE(DEFINE_DENSE_VECTOR_FUNCS); - sycl::event (*release_dense_vector)(sycl::queue &queue, + sycl::event (*release_dense_vector)(sycl::queue& queue, oneapi::math::sparse::dense_vector_handle_t dvhandle, - const std::vector &dependencies); + const std::vector& dependencies); // Dense matrix FOR_EACH_FP_TYPE(DEFINE_DENSE_MATRIX_FUNCS); - sycl::event (*release_dense_matrix)(sycl::queue &queue, + sycl::event (*release_dense_matrix)(sycl::queue& queue, oneapi::math::sparse::dense_matrix_handle_t dmhandle, - const std::vector &dependencies); + const std::vector& dependencies); // COO matrix FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); @@ -121,117 +121,118 @@ typedef struct { FOR_EACH_FP_AND_INT_TYPE(DEFINE_CSR_MATRIX_FUNCS); // Common sparse matrix functions - sycl::event (*release_sparse_matrix)(sycl::queue &queue, + sycl::event (*release_sparse_matrix)(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle, - const std::vector &dependencies); + const std::vector& dependencies); - bool (*set_matrix_property)(sycl::queue &queue, oneapi::math::sparse::matrix_handle_t smhandle, + bool (*set_matrix_property)(sycl::queue& queue, oneapi::math::sparse::matrix_handle_t smhandle, oneapi::math::sparse::matrix_property property); // SPMM - void (*init_spmm_descr)(sycl::queue &queue, oneapi::math::sparse::spmm_descr_t *p_spmm_descr); + void (*init_spmm_descr)(sycl::queue& queue, oneapi::math::sparse::spmm_descr_t* p_spmm_descr); - sycl::event (*release_spmm_descr)(sycl::queue &queue, + sycl::event (*release_spmm_descr)(sycl::queue& queue, oneapi::math::sparse::spmm_descr_t spmm_descr, - const std::vector &dependencies); + const std::vector& dependencies); - void (*spmm_buffer_size)(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, + void (*spmm_buffer_size)(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg alg, oneapi::math::sparse::spmm_descr_t spmm_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); void (*spmm_optimize_buffer)( - sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, oneapi::math::sparse::matrix_view A_view, + sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, + const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg alg, oneapi::math::sparse::spmm_descr_t spmm_descr, sycl::buffer workspace); - sycl::event (*spmm_optimize_usm)(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, + sycl::event (*spmm_optimize_usm)(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_matrix_handle_t B_handle, - const void *beta, + const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg alg, - oneapi::math::sparse::spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies); + oneapi::math::sparse::spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies); - sycl::event (*spmm)(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, oneapi::math::sparse::matrix_view A_view, + sycl::event (*spmm)(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, + oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::math::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::math::sparse::dense_matrix_handle_t C_handle, oneapi::math::sparse::spmm_alg alg, oneapi::math::sparse::spmm_descr_t spmm_descr, - const std::vector &dependencies); + const std::vector& dependencies); // SPMV - void (*init_spmv_descr)(sycl::queue &queue, oneapi::math::sparse::spmv_descr_t *p_spmv_descr); + void (*init_spmv_descr)(sycl::queue& queue, oneapi::math::sparse::spmv_descr_t* p_spmv_descr); - sycl::event (*release_spmv_descr)(sycl::queue &queue, + sycl::event (*release_spmv_descr)(sycl::queue& queue, oneapi::math::sparse::spmv_descr_t spmv_descr, - const std::vector &dependencies); + const std::vector& dependencies); - void (*spmv_buffer_size)(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, + void (*spmv_buffer_size)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::spmv_descr_t spmv_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); void (*spmv_optimize_buffer)( - sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, + sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::spmv_descr_t spmv_descr, sycl::buffer workspace); - sycl::event (*spmv_optimize_usm)(sycl::queue &queue, oneapi::math::transpose opA, - const void *alpha, oneapi::math::sparse::matrix_view A_view, + sycl::event (*spmv_optimize_usm)(sycl::queue& queue, oneapi::math::transpose opA, + const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, - const void *beta, + const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg alg, - oneapi::math::sparse::spmv_descr_t spmv_descr, void *workspace, - const std::vector &dependencies); + oneapi::math::sparse::spmv_descr_t spmv_descr, void* workspace, + const std::vector& dependencies); - sycl::event (*spmv)(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, + sycl::event (*spmv)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, - oneapi::math::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::math::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::spmv_descr_t spmv_descr, - const std::vector &dependencies); + const std::vector& dependencies); // SPSV - void (*init_spsv_descr)(sycl::queue &queue, oneapi::math::sparse::spsv_descr_t *p_spsv_descr); + void (*init_spsv_descr)(sycl::queue& queue, oneapi::math::sparse::spsv_descr_t* p_spsv_descr); - sycl::event (*release_spsv_descr)(sycl::queue &queue, + sycl::event (*release_spsv_descr)(sycl::queue& queue, oneapi::math::sparse::spsv_descr_t spsv_descr, - const std::vector &dependencies); + const std::vector& dependencies); - void (*spsv_buffer_size)(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, + void (*spsv_buffer_size)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spsv_alg alg, oneapi::math::sparse::spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); - void (*spsv_optimize_buffer)(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, + void (*spsv_optimize_buffer)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, @@ -240,23 +241,23 @@ typedef struct { oneapi::math::sparse::spsv_descr_t spsv_descr, sycl::buffer workspace); - sycl::event (*spsv_optimize_usm)(sycl::queue &queue, oneapi::math::transpose opA, - const void *alpha, oneapi::math::sparse::matrix_view A_view, + sycl::event (*spsv_optimize_usm)(sycl::queue& queue, oneapi::math::transpose opA, + const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spsv_alg alg, - oneapi::math::sparse::spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies); + oneapi::math::sparse::spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies); - sycl::event (*spsv)(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, + sycl::event (*spsv)(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, oneapi::math::sparse::matrix_view A_view, oneapi::math::sparse::matrix_handle_t A_handle, oneapi::math::sparse::dense_vector_handle_t x_handle, oneapi::math::sparse::dense_vector_handle_t y_handle, oneapi::math::sparse::spsv_alg alg, oneapi::math::sparse::spsv_descr_t spsv_descr, - const std::vector &dependencies); + const std::vector& dependencies); } sparse_blas_function_table_t; #undef DEFINE_DENSE_VECTOR_FUNCS diff --git a/src/sparse_blas/generic_container.hpp b/src/sparse_blas/generic_container.hpp index 463956275..2fb91d988 100644 --- a/src/sparse_blas/generic_container.hpp +++ b/src/sparse_blas/generic_container.hpp @@ -314,10 +314,10 @@ void check_all_containers_compatible(const std::string& function_name, } const data_type other_int_type = internal_container->get_int_type(); if (other_int_type != data_type::none && other_int_type != first_int_type) { - throw oneapi::math::invalid_argument("sparse_blas", function_name, - "Incompatible integer types expected " + - data_type_to_str(first_int_type) + " but got " + - data_type_to_str(other_int_type)); + throw oneapi::math::invalid_argument( + "sparse_blas", function_name, + "Incompatible integer types expected " + data_type_to_str(first_int_type) + + " but got " + data_type_to_str(other_int_type)); } } } diff --git a/src/sparse_blas/macros.hpp b/src/sparse_blas/macros.hpp index 34db8efc5..aa7e43784 100644 --- a/src/sparse_blas/macros.hpp +++ b/src/sparse_blas/macros.hpp @@ -36,10 +36,10 @@ FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int32_t, _i32); \ FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int64_t, _i64) -#define THROW_IF_NULLPTR(FUNC_NAME, PTR) \ - if (!(PTR)) { \ +#define THROW_IF_NULLPTR(FUNC_NAME, PTR) \ + if (!(PTR)) { \ throw math::uninitialized("sparse_blas", FUNC_NAME, \ - std::string(#PTR) + " must not be nullptr."); \ + std::string(#PTR) + " must not be nullptr."); \ } #endif // _ONEMATH_SPARSE_BLAS_MACROS_HPP_ diff --git a/src/sparse_blas/sparse_blas_loader.cpp b/src/sparse_blas/sparse_blas_loader.cpp index bc17b490d..2053dffcb 100644 --- a/src/sparse_blas/sparse_blas_loader.cpp +++ b/src/sparse_blas/sparse_blas_loader.cpp @@ -27,35 +27,35 @@ namespace oneapi::math::sparse { static oneapi::math::detail::table_initializer + sparse_blas_function_table_t> function_tables; // Dense vector #define DEFINE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ template <> \ - void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, \ + void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle, \ std::int64_t size, sycl::buffer val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_dense_vector_buffer##FP_SUFFIX(queue, p_dvhandle, \ size, val); \ } \ template <> \ - void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, \ - std::int64_t size, FP_TYPE *val) { \ + void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle, \ + std::int64_t size, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_dense_vector_usm##FP_SUFFIX(queue, p_dvhandle, \ size, val); \ } \ template <> \ - void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, \ + void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle, \ std::int64_t size, sycl::buffer val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_dense_vector_data_buffer##FP_SUFFIX( \ queue, dvhandle, size, val); \ } \ template <> \ - void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, \ - std::int64_t size, FP_TYPE *val) { \ + void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle, \ + std::int64_t size, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_dense_vector_data_usm##FP_SUFFIX(queue, dvhandle, \ size, val); \ @@ -63,8 +63,8 @@ static oneapi::math::detail::table_initializer &dependencies) { +sycl::event release_dense_vector(sycl::queue& queue, dense_vector_handle_t dvhandle, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_dense_vector(queue, dvhandle, dependencies); } @@ -72,7 +72,7 @@ sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhan // Dense matrix #define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ template <> \ - void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, \ + void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ layout dense_layout, sycl::buffer val) { \ auto libkey = get_device_id(queue); \ @@ -80,15 +80,15 @@ sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhan queue, p_dmhandle, num_rows, num_cols, ld, dense_layout, val); \ } \ template <> \ - void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, \ + void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - layout dense_layout, FP_TYPE *val) { \ + layout dense_layout, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_dense_matrix_usm##FP_SUFFIX( \ queue, p_dmhandle, num_rows, num_cols, ld, dense_layout, val); \ } \ template <> \ - void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, \ + void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ layout dense_layout, sycl::buffer val) { \ auto libkey = get_device_id(queue); \ @@ -96,9 +96,9 @@ sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhan queue, dmhandle, num_rows, num_cols, ld, dense_layout, val); \ } \ template <> \ - void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, \ + void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - layout dense_layout, FP_TYPE *val) { \ + layout dense_layout, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_dense_matrix_data_usm##FP_SUFFIX( \ queue, dmhandle, num_rows, num_cols, ld, dense_layout, val); \ @@ -106,8 +106,8 @@ sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhan FOR_EACH_FP_TYPE(DEFINE_DENSE_MATRIX_FUNCS); #undef DEFINE_DENSE_MATRIX_FUNCS -sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, - const std::vector &dependencies) { +sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhandle, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_dense_matrix(queue, dmhandle, dependencies); } @@ -115,7 +115,7 @@ sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhan // COO matrix #define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ template <> \ - void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ sycl::buffer row_ind, sycl::buffer col_ind, \ sycl::buffer val) { \ @@ -124,15 +124,15 @@ sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhan queue, p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } \ template <> \ - void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ - INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val) { \ + INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX( \ queue, p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } \ template <> \ - void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ sycl::buffer row_ind, sycl::buffer col_ind, \ sycl::buffer val) { \ @@ -141,9 +141,9 @@ sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhan queue, smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } \ template <> \ - void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ - INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val) { \ + INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX( \ queue, smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ @@ -154,7 +154,7 @@ FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); // CSR matrix #define DEFINE_INIT_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ template <> \ - void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ sycl::buffer row_ptr, sycl::buffer col_ind, \ sycl::buffer val) { \ @@ -163,15 +163,15 @@ FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); queue, p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ } \ template <> \ - void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ - INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val) { \ + INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX( \ queue, p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ } \ template <> \ - void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ sycl::buffer row_ptr, sycl::buffer col_ind, \ sycl::buffer val) { \ @@ -180,9 +180,9 @@ FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); queue, smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ } \ template <> \ - void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ - INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val) { \ + INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX( \ queue, smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ @@ -191,43 +191,43 @@ FOR_EACH_FP_AND_INT_TYPE(DEFINE_INIT_CSR_MATRIX_FUNCS); #undef DEFINE_INIT_CSR_MATRIX_FUNCS // Common sparse matrix functions -sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, - const std::vector &dependencies) { +sycl::event release_sparse_matrix(sycl::queue& queue, matrix_handle_t smhandle, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_sparse_matrix(queue, smhandle, dependencies); } -bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property) { +bool set_matrix_property(sycl::queue& queue, matrix_handle_t smhandle, matrix_property property) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].set_matrix_property(queue, smhandle, property); } // SPMM -void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr) { +void init_spmm_descr(sycl::queue& queue, spmm_descr_t* p_spmm_descr) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].init_spmm_descr(queue, p_spmm_descr); } -sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, - const std::vector &dependencies) { +sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_spmm_descr(queue, spmm_descr, dependencies); } -void spmm_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, +void spmm_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - std::size_t &temp_buffer_size) { + std::size_t& temp_buffer_size) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spmm_buffer_size(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, temp_buffer_size); } -void spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, +void spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spmm_optimize_buffer(queue, opA, opB, alpha, A_view, @@ -235,23 +235,23 @@ void spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math spmm_descr, workspace); } -sycl::event spmm_optimize(sycl::queue &queue, oneapi::math::transpose opA, - oneapi::math::transpose opB, const void *alpha, matrix_view A_view, +sycl::event spmm_optimize(sycl::queue& queue, oneapi::math::transpose opA, + oneapi::math::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies) { + const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spmm_optimize_usm( queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, workspace, dependencies); } -sycl::event spmm(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, +sycl::event spmm(sycl::queue& queue, oneapi::math::transpose opA, oneapi::math::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spmm(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, @@ -259,78 +259,78 @@ sycl::event spmm(sycl::queue &queue, oneapi::math::transpose opA, oneapi::math:: } // SPMV -void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr) { +void init_spmv_descr(sycl::queue& queue, spmv_descr_t* p_spmv_descr) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].init_spmv_descr(queue, p_spmv_descr); } -sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, - const std::vector &dependencies) { +sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_spmv_descr(queue, spmv_descr, dependencies); } -void spmv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spmv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t& temp_buffer_size) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spmv_buffer_size(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, temp_buffer_size); } -void spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, sycl::buffer workspace) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spmv_optimize_buffer( queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, workspace); } -sycl::event spmv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spmv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, - void *workspace, const std::vector &dependencies) { + void* workspace, const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spmv_optimize_usm( queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, workspace, dependencies); } -sycl::event spmv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spmv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, const std::vector &dependencies) { + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spmv(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, dependencies); } // SPSV -void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr) { +void init_spsv_descr(sycl::queue& queue, spsv_descr_t* p_spsv_descr) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].init_spsv_descr(queue, p_spsv_descr); } -sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, - const std::vector &dependencies) { +sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_spsv_descr(queue, spsv_descr, dependencies); } -void spsv_buffer_size(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spsv_buffer_size(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size) { + std::size_t& temp_buffer_size) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spsv_buffer_size( queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, temp_buffer_size); } -void spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +void spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, sycl::buffer workspace) { @@ -339,21 +339,21 @@ void spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void * queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, workspace); } -sycl::event spsv_optimize(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spsv_optimize(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, - spsv_alg alg, spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies) { + spsv_alg alg, spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spsv_optimize_usm(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, workspace, dependencies); } -sycl::event spsv(sycl::queue &queue, oneapi::math::transpose opA, const void *alpha, +sycl::event spsv(sycl::queue& queue, oneapi::math::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spsv(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, dependencies); diff --git a/tests/unit_tests/blas/batch/axpy_batch_stride.cpp b/tests/unit_tests/blas/batch/axpy_batch_stride.cpp index c83dedd5e..708921e9e 100644 --- a/tests/unit_tests/blas/batch/axpy_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/axpy_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, fp alpha, +int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, fp alpha, int64_t batch_size) { // Prepare data. int64_t n, i; @@ -77,19 +77,19 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, f int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::axpy(&n_ref, (fp_ref *)&alpha, (fp_ref *)x.data() + i * stride_x, &incx_ref, - (fp_ref *)y_ref.data() + i * stride_y, &incy_ref); + ::axpy(&n_ref, (fp_ref*)&alpha, (fp_ref*)x.data() + i * stride_x, &incx_ref, + (fp_ref*)y_ref.data() + i * stride_y, &incy_ref); } // Call DPC++ AXPY_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -107,13 +107,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, f switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::axpy_batch(main_queue, n, alpha, x_buffer, incx, - stride_x, y_buffer, incy, stride_y, - batch_size); + stride_x, y_buffer, incy, stride_y, + batch_size); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::axpy_batch(main_queue, n, alpha, x_buffer, incx, - stride_x, y_buffer, incy, stride_y, - batch_size); + stride_x, y_buffer, incy, stride_y, + batch_size); break; default: break; } @@ -133,17 +133,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, f } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -160,7 +160,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, f } class AxpyBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpyBatchStrideTests, RealSinglePrecision) { float alpha = 2.0; diff --git a/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp index 5951bc0f5..fa5646ac9 100644 --- a/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp @@ -43,20 +43,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, fp alpha, +int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, fp alpha, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -101,8 +101,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, f int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::axpy(&n_ref, (fp_ref *)&alpha, (fp_ref *)x.data() + i * stride_x, &incx_ref, - (fp_ref *)y_ref.data() + i * stride_y, &incy_ref); + ::axpy(&n_ref, (fp_ref*)&alpha, (fp_ref*)x.data() + i * stride_x, &incx_ref, + (fp_ref*)y_ref.data() + i * stride_y, &incy_ref); } // Call DPC++ AXPY_BATCH_STRIDE. @@ -117,8 +117,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, f break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::axpy_batch(main_queue, n, alpha, &x[0], incx, - stride_x, &y[0], incy, stride_y, - batch_size, dependencies); + stride_x, &y[0], incy, stride_y, + batch_size, dependencies); break; default: break; } @@ -140,17 +140,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, f main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -166,7 +166,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, f } class AxpyBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpyBatchStrideUsmTests, RealSinglePrecision) { float alpha = 2.0; diff --git a/tests/unit_tests/blas/batch/axpy_batch_usm.cpp b/tests/unit_tests/blas/batch/axpy_batch_usm.cpp index bbb62a19c..bc70aef9d 100644 --- a/tests/unit_tests/blas/batch/axpy_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/axpy_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t group_count) { +int test(device* dev, oneapi::math::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -69,15 +69,15 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { std::vector dependencies; // Prepare data. - int64_t *n = - (int64_t *)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *incx = - (int64_t *)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *incy = - (int64_t *)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - fp *alpha = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * group_count, *dev, cxt); - int64_t *group_size = - (int64_t *)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* n = + (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* incx = + (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* incy = + (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + fp* alpha = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * group_count, *dev, cxt); + int64_t* group_size = + (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); if ((n == NULL) || (incx == NULL) || (incy == NULL) || (alpha == NULL) || (group_size == NULL)) { @@ -104,12 +104,12 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - fp **x_array = - (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); - fp **y_array = - (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); - fp **y_ref_array = - (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); + fp** x_array = + (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); + fp** y_array = + (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); + fp** y_ref_array = + (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); if ((x_array == NULL) || (y_array == NULL) || (y_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -124,11 +124,11 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_size_x = (1 + (n[i] - 1) * std::abs(incx[i])); total_size_y = (1 + (n[i] - 1) * std::abs(incy[i])); x_array[idx] = - (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt); + (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt); y_array[idx] = - (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); + (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); y_ref_array[idx] = - (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); + (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); rand_vector(x_array[idx], n[i], incx[i]); rand_vector(y_array[idx], n[i], incy[i]); copy_vector(y_array[idx], n[i], incy[i], y_ref_array[idx]); @@ -146,8 +146,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { n_ref = (int)n[i]; incx_ref = (int)incx[i]; incy_ref = (int)incy[i]; - ::axpy((const int *)&n_ref, (const fp_ref *)&alpha[i], (const fp_ref *)x_array[idx], - (const int *)&incx_ref, (fp_ref *)y_ref_array[idx], (const int *)&incy_ref); + ::axpy((const int*)&n_ref, (const fp_ref*)&alpha[i], (const fp_ref*)x_array[idx], + (const int*)&incx_ref, (fp_ref*)y_ref_array[idx], (const int*)&incy_ref); idx++; } } @@ -159,12 +159,12 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::axpy_batch( - main_queue, n, alpha, (const fp **)x_array, incx, y_array, incy, group_count, + main_queue, n, alpha, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::axpy_batch( - main_queue, n, alpha, (const fp **)x_array, incx, y_array, incy, group_count, + main_queue, n, alpha, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; default: break; @@ -174,12 +174,12 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { switch (layout) { case oneapi::math::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy_batch, n, - alpha, (const fp **)x_array, incx, y_array, incy, + alpha, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpy_batch, n, - alpha, (const fp **)x_array, incx, y_array, incy, + alpha, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; default: break; @@ -187,13 +187,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { idx = 0; for (i = 0; i < group_count; i++) { for (j = 0; j < group_size[i]; j++) { @@ -214,7 +214,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY_BATCH:\n" << error.what() << std::endl; } @@ -252,7 +252,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } class AxpyBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpyBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/copy_batch_stride.cpp b/tests/unit_tests/blas/batch/copy_batch_stride.cpp index a0d933d2d..48bec1291 100644 --- a/tests/unit_tests/blas/batch/copy_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/copy_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { // Prepare data. int64_t n, i; @@ -76,19 +76,19 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::copy(&n_ref, (fp_ref *)x.data() + i * stride_x, &incx_ref, - (fp_ref *)y_ref.data() + i * stride_y, &incy_ref); + ::copy(&n_ref, (fp_ref*)x.data() + i * stride_x, &incx_ref, + (fp_ref*)y_ref.data() + i * stride_y, &incy_ref); } // Call DPC++ COPY_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during COPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -105,12 +105,12 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::copy_batch(main_queue, n, x_buffer, incx, stride_x, - y_buffer, incy, stride_y, batch_size); + oneapi::math::blas::column_major::copy_batch( + main_queue, n, x_buffer, incx, stride_x, y_buffer, incy, stride_y, batch_size); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::copy_batch(main_queue, n, x_buffer, incx, stride_x, - y_buffer, incy, stride_y, batch_size); + y_buffer, incy, stride_y, batch_size); break; default: break; } @@ -130,17 +130,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during COPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of COPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -157,7 +157,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i } class CopyBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(CopyBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 2, 3, 15)); diff --git a/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp index e1502799f..291040948 100644 --- a/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during COPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -100,8 +100,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::copy(&n_ref, (fp_ref *)x.data() + i * stride_x, &incx_ref, - (fp_ref *)y_ref.data() + i * stride_y, &incy_ref); + ::copy(&n_ref, (fp_ref*)x.data() + i * stride_x, &incx_ref, + (fp_ref*)y_ref.data() + i * stride_y, &incy_ref); } // Call DPC++ COPY_BATCH_STRIDE. @@ -111,13 +111,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::copy_batch(main_queue, n, &x[0], incx, - stride_x, &y[0], incy, stride_y, - batch_size, dependencies); + stride_x, &y[0], incy, stride_y, + batch_size, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::copy_batch(main_queue, n, &x[0], incx, - stride_x, &y[0], incy, stride_y, - batch_size, dependencies); + stride_x, &y[0], incy, stride_y, + batch_size, dependencies); break; default: break; } @@ -139,17 +139,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during COPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of COPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -165,7 +165,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i } class CopyBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(CopyBatchStrideUsmTests, RealSinglePrecision) { float alpha = 2.0; diff --git a/tests/unit_tests/blas/batch/copy_batch_usm.cpp b/tests/unit_tests/blas/batch/copy_batch_usm.cpp index ad0b5003d..69f93b1a7 100644 --- a/tests/unit_tests/blas/batch/copy_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/copy_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t group_count) { +int test(device* dev, oneapi::math::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during COPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -69,14 +69,14 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { std::vector dependencies; // Prepare data. - int64_t *n = - (int64_t *)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *incx = - (int64_t *)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *incy = - (int64_t *)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *group_size = - (int64_t *)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* n = + (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* incx = + (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* incy = + (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* group_size = + (int64_t*)oneapi::math::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); if ((n == NULL) || (incx == NULL) || (incy == NULL) || (group_size == NULL)) { std::cout << "Error cannot allocate input arrays\n"; @@ -100,12 +100,12 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - fp **x_array = - (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); - fp **y_array = - (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); - fp **y_ref_array = - (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); + fp** x_array = + (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); + fp** y_array = + (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); + fp** y_ref_array = + (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); if ((x_array == NULL) || (y_array == NULL) || (y_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -120,11 +120,11 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_size_x = (1 + (n[i] - 1) * std::abs(incx[i])); total_size_y = (1 + (n[i] - 1) * std::abs(incy[i])); x_array[idx] = - (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt); + (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt); y_array[idx] = - (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); + (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); y_ref_array[idx] = - (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); + (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); rand_vector(x_array[idx], n[i], incx[i]); rand_vector(y_array[idx], n[i], incy[i]); copy_vector(y_array[idx], n[i], incy[i], y_ref_array[idx]); @@ -142,8 +142,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { n_ref = (int)n[i]; incx_ref = (int)incx[i]; incy_ref = (int)incy[i]; - ::copy((const int *)&n_ref, (const fp_ref *)x_array[idx], (const int *)&incx_ref, - (fp_ref *)y_ref_array[idx], (const int *)&incy_ref); + ::copy((const int*)&n_ref, (const fp_ref*)x_array[idx], (const int*)&incx_ref, + (fp_ref*)y_ref_array[idx], (const int*)&incy_ref); idx++; } } @@ -155,13 +155,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::copy_batch( - main_queue, n, (const fp **)x_array, incx, y_array, incy, group_count, + main_queue, n, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; case oneapi::math::layout::row_major: - done = oneapi::math::blas::row_major::copy_batch(main_queue, n, (const fp **)x_array, - incx, y_array, incy, group_count, - group_size, dependencies); + done = oneapi::math::blas::row_major::copy_batch(main_queue, n, (const fp**)x_array, + incx, y_array, incy, group_count, + group_size, dependencies); break; default: break; } @@ -170,12 +170,12 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { switch (layout) { case oneapi::math::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::copy_batch, n, - (const fp **)x_array, incx, y_array, incy, group_count, + (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy_batch, n, - (const fp **)x_array, incx, y_array, incy, group_count, + (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; default: break; @@ -183,13 +183,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during COPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { idx = 0; for (i = 0; i < group_count; i++) { for (j = 0; j < group_size[i]; j++) { @@ -209,7 +209,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of COPY_BATCH:\n" << error.what() << std::endl; } @@ -246,7 +246,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } class CopyBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(CopyBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp b/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp index 658c57aa6..9fa01c52a 100644 --- a/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::side left_right, int64_t incx, +int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right, int64_t incx, int64_t batch_size) { // Prepare data. int64_t m, n; @@ -90,21 +90,20 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::side left_right for (i = 0; i < batch_size_ref; i++) { ::dgmm(convert_to_cblas_layout(layout), convert_to_cblas_side(left_right), - (const int *)&m_ref, (const int *)&n_ref, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (const fp_ref *)(x.data() + stride_x * i), - (const int *)&incx_ref, (fp_ref *)(C_ref.data() + stride_c * i), - (const int *)&ldc_ref); + (const int*)&m_ref, (const int*)&n_ref, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)(x.data() + stride_x * i), + (const int*)&incx_ref, (fp_ref*)(C_ref.data() + stride_c * i), (const int*)&ldc_ref); } // Call DPC++ DGMM_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DGMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -122,14 +121,14 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::side left_right #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::dgmm_batch(main_queue, left_right, m, n, A_buffer, - lda, stride_a, x_buffer, incx, stride_x, - C_buffer, ldc, stride_c, batch_size); + oneapi::math::blas::column_major::dgmm_batch( + main_queue, left_right, m, n, A_buffer, lda, stride_a, x_buffer, incx, stride_x, + C_buffer, ldc, stride_c, batch_size); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::dgmm_batch(main_queue, left_right, m, n, A_buffer, - lda, stride_a, x_buffer, incx, stride_x, - C_buffer, ldc, stride_c, batch_size); + lda, stride_a, x_buffer, incx, stride_x, + C_buffer, ldc, stride_c, batch_size); break; default: break; } @@ -149,17 +148,17 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::side left_right } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DGMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DGMM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -177,7 +176,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::side left_right } class DgmmBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DgmmBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), diff --git a/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp index eca3eb6f1..c486ac90e 100644 --- a/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp @@ -43,20 +43,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::side left_right, int64_t incx, +int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right, int64_t incx, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DGMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -115,10 +115,9 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::side left_right for (i = 0; i < batch_size_ref; i++) { ::dgmm(convert_to_cblas_layout(layout), convert_to_cblas_side(left_right), - (const int *)&m_ref, (const int *)&n_ref, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (const fp_ref *)(x.data() + stride_x * i), - (const int *)&incx_ref, (fp_ref *)(C_ref.data() + stride_c * i), - (const int *)&ldc_ref); + (const int*)&m_ref, (const int*)&n_ref, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)(x.data() + stride_x * i), + (const int*)&incx_ref, (fp_ref*)(C_ref.data() + stride_c * i), (const int*)&ldc_ref); } // Call DPC++ DGMM_BATCH_STRIDE. @@ -156,17 +155,17 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::side left_right main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DGMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DGMM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -182,7 +181,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::side left_right } class DgmmBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DgmmBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), diff --git a/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp b/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp index 01c39cad6..3df3bffd2 100644 --- a/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t group_count) { +int test(device* dev, oneapi::math::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DGMM_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -102,9 +102,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), x_array(uafpp), c_array(uafpp), - c_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), x_array(uafpp), c_array(uafpp), c_ref_array(uafpp); a_array.resize(total_batch_count); x_array.resize(total_batch_count); c_array.resize(total_batch_count); @@ -117,13 +116,15 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { size_x = 1 + (x_len - 1) * std::abs(incx[i]); size_c = (layout == oneapi::math::layout::col_major) ? ldc[i] * n[i] : ldc[i] * m[i]; for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - x_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt); - c_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); - c_ref_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); - rand_matrix(a_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], lda[i]); + a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + x_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt); + c_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); + c_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); + rand_matrix(a_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], + lda[i]); rand_vector(x_array[idx], x_len, incx[i]); - rand_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], ldc[i]); + rand_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], + ldc[i]); copy_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], ldc[i], c_ref_array[idx]); idx++; @@ -132,15 +133,15 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { // Call reference DGMM_BATCH. using fp_ref = typename ref_type_info::type; - int *m_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *n_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *incx_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *ldc_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* m_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* incx_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* ldc_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - CBLAS_SIDE *left_right_ref = - (CBLAS_SIDE *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count); + CBLAS_SIDE* left_right_ref = + (CBLAS_SIDE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count); if ((m_ref == NULL) || (n_ref == NULL) || (lda_ref == NULL) || (incx_ref == NULL) || (ldc_ref == NULL) || (left_right_ref == NULL) || (group_size_ref == NULL)) { @@ -174,10 +175,10 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { ldc_ref[i] = (int)ldc[i]; group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { - ::dgmm(convert_to_cblas_layout(layout), left_right_ref[i], (const int *)&m_ref[i], - (const int *)&n_ref[i], (const fp_ref *)a_array[idx], (const int *)&lda_ref[i], - (const fp_ref *)x_array[idx], (const int *)&incx_ref[i], - (fp_ref *)c_ref_array[idx], (const int *)&ldc_ref[i]); + ::dgmm(convert_to_cblas_layout(layout), left_right_ref[i], (const int*)&m_ref[i], + (const int*)&n_ref[i], (const fp_ref*)a_array[idx], (const int*)&lda_ref[i], + (const fp_ref*)x_array[idx], (const int*)&incx_ref[i], (fp_ref*)c_ref_array[idx], + (const int*)&ldc_ref[i]); idx++; } } @@ -189,14 +190,14 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::dgmm_batch( - main_queue, &left_right[0], &m[0], &n[0], (const fp **)&a_array[0], &lda[0], - (const fp **)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, + main_queue, &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], &lda[0], + (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::dgmm_batch( - main_queue, &left_right[0], &m[0], &n[0], (const fp **)&a_array[0], &lda[0], - (const fp **)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, + main_queue, &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], &lda[0], + (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -206,14 +207,14 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { switch (layout) { case oneapi::math::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::dgmm_batch, - &left_right[0], &m[0], &n[0], (const fp **)&a_array[0], - &lda[0], (const fp **)&x_array[0], &incx[0], &c_array[0], + &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], + &lda[0], (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dgmm_batch, - &left_right[0], &m[0], &n[0], (const fp **)&a_array[0], - &lda[0], (const fp **)&x_array[0], &incx[0], &c_array[0], + &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], + &lda[0], (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -221,13 +222,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DGMM_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::aligned_free(m_ref); oneapi::math::aligned_free(n_ref); oneapi::math::aligned_free(lda_ref); @@ -248,7 +249,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DGMM_BATCH:\n" << error.what() << std::endl; } @@ -285,7 +286,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } class DgmmBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DgmmBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/gemm_batch_stride.cpp b/tests/unit_tests/blas/batch/gemm_batch_stride.cpp index 999910829..50e90ccbb 100644 --- a/tests/unit_tests/blas/batch/gemm_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/gemm_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Prepare data. int64_t m, n, k; int64_t lda, ldb, ldc; @@ -135,23 +135,22 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { for (i = 0; i < batch_size_ref; i++) { ::gemm(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), - convert_to_cblas_trans(transb), (const int *)&m_ref, (const int *)&n_ref, - (const int *)&k_ref, (const fp_ref *)&alpha, - (const fp_ref *)(A_ref.data() + stride_a * i), (const int *)&lda_ref, - (const fp_ref *)(B_ref.data() + stride_b * i), (const int *)&ldb_ref, - (const fp_ref *)&beta, (fp_ref *)(C_ref.data() + stride_c * i), - (const int *)&ldc_ref); + convert_to_cblas_trans(transb), (const int*)&m_ref, (const int*)&n_ref, + (const int*)&k_ref, (const fp_ref*)&alpha, + (const fp_ref*)(A_ref.data() + stride_a * i), (const int*)&lda_ref, + (const fp_ref*)(B_ref.data() + stride_b * i), (const int*)&ldb_ref, + (const fp_ref*)&beta, (fp_ref*)(C_ref.data() + stride_c * i), (const int*)&ldc_ref); } // Call DPC++ GEMM_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -199,17 +198,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { #endif main_queue.wait_and_throw(); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -232,7 +231,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } class GemmBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemmBatchStrideTests, RealHalfPrecision) { EXPECT_TRUEORSKIP((test( diff --git a/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp index d556b81e1..1f46e1d68 100644 --- a/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -136,10 +136,10 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { C_ref.resize(stride_c * batch_size); C_cast_ref.resize(stride_c * batch_size); - Ta **a_array = (Ta **)oneapi::math::malloc_shared(64, sizeof(Ta *) * batch_size, *dev, cxt); - Tb **b_array = (Tb **)oneapi::math::malloc_shared(64, sizeof(Tb *) * batch_size, *dev, cxt); - Tc **c_array = (Tc **)oneapi::math::malloc_shared(64, sizeof(Tc *) * batch_size, *dev, cxt); - Ts **c_ref_array = (Ts **)oneapi::math::malloc_shared(64, sizeof(Ts *) * batch_size, *dev, cxt); + Ta** a_array = (Ta**)oneapi::math::malloc_shared(64, sizeof(Ta*) * batch_size, *dev, cxt); + Tb** b_array = (Tb**)oneapi::math::malloc_shared(64, sizeof(Tb*) * batch_size, *dev, cxt); + Tc** c_array = (Tc**)oneapi::math::malloc_shared(64, sizeof(Tc*) * batch_size, *dev, cxt); + Ts** c_ref_array = (Ts**)oneapi::math::malloc_shared(64, sizeof(Ts*) * batch_size, *dev, cxt); if ((a_array == NULL) || (b_array == NULL) || (c_array == NULL) || (c_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -181,12 +181,11 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { ::gemm(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), - convert_to_cblas_trans(transb), (const int *)&m_ref, (const int *)&n_ref, - (const int *)&k_ref, (const fp_ref *)&alpha, - (const fp_ref *)(A_ref.data() + stride_a * i), (const int *)&lda_ref, - (const fp_ref *)(B_ref.data() + stride_b * i), (const int *)&ldb_ref, - (const fp_ref *)&beta, (fp_ref *)(C_ref.data() + stride_c * i), - (const int *)&ldc_ref); + convert_to_cblas_trans(transb), (const int*)&m_ref, (const int*)&n_ref, + (const int*)&k_ref, (const fp_ref*)&alpha, + (const fp_ref*)(A_ref.data() + stride_a * i), (const int*)&lda_ref, + (const fp_ref*)(B_ref.data() + stride_b * i), (const int*)&ldb_ref, + (const fp_ref*)&beta, (fp_ref*)(C_ref.data() + stride_c * i), (const int*)&ldc_ref); } // Call DPC++ GEMM_BATCH_STRIDE. @@ -226,13 +225,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { main_queue.wait_and_throw(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::free_shared(a_array, cxt); oneapi::math::free_shared(b_array, cxt); oneapi::math::free_shared(c_array, cxt); @@ -240,7 +239,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -267,7 +266,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } class GemmBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemmBatchStrideUsmTests, RealHalfPrecision) { EXPECT_TRUEORSKIP((test( diff --git a/tests/unit_tests/blas/batch/gemm_batch_usm.cpp b/tests/unit_tests/blas/batch/gemm_batch_usm.cpp index c02279b53..8c4fd6a37 100644 --- a/tests/unit_tests/blas/batch/gemm_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/gemm_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t group_count) { +int test(device* dev, oneapi::math::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMM_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -126,14 +126,14 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uaTap = usm_allocator(cxt, *dev); - auto uaTbp = usm_allocator(cxt, *dev); - auto uaTcp = usm_allocator(cxt, *dev); - auto uaTsp = usm_allocator(cxt, *dev); - vector a_array(uaTap); - vector b_array(uaTbp); - vector c_array(uaTcp), c_cast_ref_array(uaTcp); - vector a_ref_array(uaTsp), b_ref_array(uaTsp), c_ref_array(uaTsp); + auto uaTap = usm_allocator(cxt, *dev); + auto uaTbp = usm_allocator(cxt, *dev); + auto uaTcp = usm_allocator(cxt, *dev); + auto uaTsp = usm_allocator(cxt, *dev); + vector a_array(uaTap); + vector b_array(uaTbp); + vector c_array(uaTcp), c_cast_ref_array(uaTcp); + vector a_ref_array(uaTsp), b_ref_array(uaTsp), c_ref_array(uaTsp); a_array.resize(total_batch_count); b_array.resize(total_batch_count); c_array.resize(total_batch_count); @@ -158,17 +158,18 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { default: break; } for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (Ta *)oneapi::math::malloc_shared(64, sizeof(Ta) * size_a, *dev, cxt); - b_array[idx] = (Tb *)oneapi::math::malloc_shared(64, sizeof(Tb) * size_b, *dev, cxt); - c_array[idx] = (Tc *)oneapi::math::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt); - a_ref_array[idx] = (Ts *)oneapi::math::malloc_shared(64, sizeof(Ts) * size_a, *dev, cxt); - b_ref_array[idx] = (Ts *)oneapi::math::malloc_shared(64, sizeof(Ts) * size_b, *dev, cxt); + a_array[idx] = (Ta*)oneapi::math::malloc_shared(64, sizeof(Ta) * size_a, *dev, cxt); + b_array[idx] = (Tb*)oneapi::math::malloc_shared(64, sizeof(Tb) * size_b, *dev, cxt); + c_array[idx] = (Tc*)oneapi::math::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt); + a_ref_array[idx] = (Ts*)oneapi::math::malloc_shared(64, sizeof(Ts) * size_a, *dev, cxt); + b_ref_array[idx] = (Ts*)oneapi::math::malloc_shared(64, sizeof(Ts) * size_b, *dev, cxt); c_cast_ref_array[idx] = - (Tc *)oneapi::math::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt); - c_ref_array[idx] = (Ts *)oneapi::math::malloc_shared(64, sizeof(Ts) * size_c, *dev, cxt); + (Tc*)oneapi::math::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt); + c_ref_array[idx] = (Ts*)oneapi::math::malloc_shared(64, sizeof(Ts) * size_c, *dev, cxt); rand_matrix(a_array[idx], layout, transa[i], m[i], k[i], lda[i]); rand_matrix(b_array[idx], layout, transb[i], k[i], n[i], ldb[i]); - rand_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], ldc[i]); + rand_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], + ldc[i]); copy_matrix(a_array[idx], layout, transa[i], m[i], k[i], lda[i], a_ref_array[idx]); copy_matrix(b_array[idx], layout, transb[i], k[i], n[i], ldb[i], b_ref_array[idx]); copy_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], ldc[i], @@ -179,18 +180,18 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { // Call reference GEMM_BATCH. using fp_ref = typename ref_type_info::type; - int *m_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *n_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *k_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *ldb_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *ldc_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - - CBLAS_TRANSPOSE *transa_ref = - (CBLAS_TRANSPOSE *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); - CBLAS_TRANSPOSE *transb_ref = - (CBLAS_TRANSPOSE *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + int* m_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* k_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* ldb_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* ldc_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + + CBLAS_TRANSPOSE* transa_ref = + (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + CBLAS_TRANSPOSE* transb_ref = + (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); if ((m_ref == NULL) || (n_ref == NULL) || (k_ref == NULL) || (lda_ref == NULL) || (ldb_ref == NULL) || (ldc_ref == NULL) || (transa_ref == NULL) || (transb_ref == NULL) || @@ -233,11 +234,11 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { ::gemm(convert_to_cblas_layout(layout), transa_ref[i], transb_ref[i], - (const int *)&m_ref[i], (const int *)&n_ref[i], (const int *)&k_ref[i], - (const fp_ref *)&alpha[i], (const fp_ref *)a_ref_array[idx], - (const int *)&lda_ref[i], (const fp_ref *)b_ref_array[idx], - (const int *)&ldb_ref[i], (const fp_ref *)&beta[i], (fp_ref *)c_ref_array[idx], - (const int *)&ldc_ref[i]); + (const int*)&m_ref[i], (const int*)&n_ref[i], (const int*)&k_ref[i], + (const fp_ref*)&alpha[i], (const fp_ref*)a_ref_array[idx], + (const int*)&lda_ref[i], (const fp_ref*)b_ref_array[idx], + (const int*)&ldb_ref[i], (const fp_ref*)&beta[i], (fp_ref*)c_ref_array[idx], + (const int*)&ldc_ref[i]); idx++; } } @@ -250,13 +251,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::gemm_batch( main_queue, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0], - (const Ta **)&a_array[0], &lda[0], (const Tb **)&b_array[0], &ldb[0], &beta[0], + (const Ta**)&a_array[0], &lda[0], (const Tb**)&b_array[0], &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::gemm_batch( main_queue, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0], - (const Ta **)&a_array[0], &lda[0], (const Tb **)&b_array[0], &ldb[0], &beta[0], + (const Ta**)&a_array[0], &lda[0], (const Tb**)&b_array[0], &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -267,14 +268,14 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { case oneapi::math::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemm_batch, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0], - (const Ta **)&a_array[0], &lda[0], (const Tb **)&b_array[0], + (const Ta**)&a_array[0], &lda[0], (const Tb**)&b_array[0], &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_batch, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0], - (const Ta **)&a_array[0], &lda[0], (const Ta **)&b_array[0], + (const Ta**)&a_array[0], &lda[0], (const Ta**)&b_array[0], &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; @@ -283,13 +284,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { main_queue.wait_and_throw(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMM_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::aligned_free(m_ref); oneapi::math::aligned_free(n_ref); oneapi::math::aligned_free(k_ref); @@ -315,7 +316,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMM_BATCH:\n" << error.what() << std::endl; } @@ -364,7 +365,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } class GemmBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemmBatchUsmTests, RealHalfPrecision) { EXPECT_TRUEORSKIP((test( diff --git a/tests/unit_tests/blas/batch/gemv_batch_stride.cpp b/tests/unit_tests/blas/batch/gemv_batch_stride.cpp index 8d4780a00..bed54f5e7 100644 --- a/tests/unit_tests/blas/batch/gemv_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/gemv_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { // Prepare data. int64_t m, n; int64_t lda; @@ -103,23 +103,22 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), (const int *)&m_ref, - (const int *)&n_ref, (const fp_ref *)&alpha, - (const fp_ref *)(A.data() + stride_a * i), (const int *)&lda_ref, - (const fp_ref *)(x.data() + stride_x * i), (const int *)&incx_ref, - (const fp_ref *)&beta, (fp_ref *)(y_ref.data() + stride_y * i), - (const int *)&incy_ref); + ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), (const int*)&m_ref, + (const int*)&n_ref, (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)(x.data() + stride_x * i), + (const int*)&incx_ref, (const fp_ref*)&beta, (fp_ref*)(y_ref.data() + stride_y * i), + (const int*)&incy_ref); } // Call DPC++ GEMV_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMV_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -164,17 +163,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -192,7 +191,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i } class GemvBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemvBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 2, 3, 5)); diff --git a/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp index 39eae0399..e009f1758 100644 --- a/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -128,10 +128,10 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), (const int *)&m_ref, - (const int *)&n_ref, (const fp_ref *)&alpha, (const fp_ref *)&A[stride_a * i], - (const int *)&lda_ref, (const fp_ref *)&x[stride_x * i], (const int *)&incx_ref, - (const fp_ref *)&beta, (fp_ref *)&y_ref[stride_y * i], (const int *)&incy_ref); + ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), (const int*)&m_ref, + (const int*)&n_ref, (const fp_ref*)&alpha, (const fp_ref*)&A[stride_a * i], + (const int*)&lda_ref, (const fp_ref*)&x[stride_x * i], (const int*)&incx_ref, + (const fp_ref*)&beta, (fp_ref*)&y_ref[stride_y * i], (const int*)&incy_ref); } // Call DPC++ GEMV_BATCH_STRIDE. @@ -171,17 +171,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -197,7 +197,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t incx, int64_t incy, i } class GemvBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemvBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 2, 3, 5)); diff --git a/tests/unit_tests/blas/batch/gemv_batch_usm.cpp b/tests/unit_tests/blas/batch/gemv_batch_usm.cpp index 40fffe810..71925b26a 100644 --- a/tests/unit_tests/blas/batch/gemv_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/gemv_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t group_count) { +int test(device* dev, oneapi::math::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMV_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -119,9 +119,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), x_array(uafpp), y_array(uafpp), - y_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), x_array(uafpp), y_array(uafpp), y_ref_array(uafpp); a_array.resize(total_batch_count); x_array.resize(total_batch_count); y_array.resize(total_batch_count); @@ -135,11 +134,12 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { size_x = 1 + (x_len - 1) * std::abs(incx[i]); size_y = 1 + (y_len - 1) * std::abs(incy[i]); for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - x_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt); - y_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt); - y_ref_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt); - rand_matrix(a_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], lda[i]); + a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + x_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt); + y_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt); + y_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt); + rand_matrix(a_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], + lda[i]); rand_vector(x_array[idx], x_len, incx[i]); rand_vector(y_array[idx], y_len, incy[i]); copy_vector(y_array[idx], y_len, incy[i], y_ref_array[idx]); @@ -149,15 +149,15 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { // Call reference GEMV_BATCH. using fp_ref = typename ref_type_info::type; - int *m_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *n_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *incx_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *incy_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* m_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* incx_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* incy_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - CBLAS_TRANSPOSE *transa_ref = - (CBLAS_TRANSPOSE *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + CBLAS_TRANSPOSE* transa_ref = + (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); if ((m_ref == NULL) || (n_ref == NULL) || (lda_ref == NULL) || (incx_ref == NULL) || (incy_ref == NULL) || (transa_ref == NULL) || (group_size_ref == NULL)) { @@ -191,11 +191,10 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { incy_ref[i] = (int)incy[i]; group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { - ::gemv(convert_to_cblas_layout(layout), transa_ref[i], (const int *)&m_ref[i], - (const int *)&n_ref[i], (const fp_ref *)&alpha[i], (const fp_ref *)a_array[idx], - (const int *)&lda_ref[i], (const fp_ref *)x_array[idx], - (const int *)&incx_ref[i], (const fp_ref *)&beta[i], (fp_ref *)y_ref_array[idx], - (const int *)&incy_ref[i]); + ::gemv(convert_to_cblas_layout(layout), transa_ref[i], (const int*)&m_ref[i], + (const int*)&n_ref[i], (const fp_ref*)&alpha[i], (const fp_ref*)a_array[idx], + (const int*)&lda_ref[i], (const fp_ref*)x_array[idx], (const int*)&incx_ref[i], + (const fp_ref*)&beta[i], (fp_ref*)y_ref_array[idx], (const int*)&incy_ref[i]); idx++; } } @@ -207,14 +206,14 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::gemv_batch( - main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp **)&a_array[0], - &lda[0], (const fp **)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], + main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp**)&a_array[0], + &lda[0], (const fp**)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::gemv_batch( - main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp **)&a_array[0], - &lda[0], (const fp **)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], + main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp**)&a_array[0], + &lda[0], (const fp**)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], group_count, &group_size[0], dependencies); break; default: break; @@ -225,29 +224,28 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { case oneapi::math::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::gemv_batch, &transa[0], &m[0], &n[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], (const fp **)&x_array[0], + (const fp**)&a_array[0], &lda[0], (const fp**)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemv_batch, - &transa[0], &m[0], &n[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], (const fp **)&x_array[0], - &incx[0], &beta[0], &y_array[0], &incy[0], group_count, - &group_size[0], dependencies); + TEST_RUN_BLAS_CT_SELECT( + main_queue, oneapi::math::blas::row_major::gemv_batch, &transa[0], &m[0], &n[0], + &alpha[0], (const fp**)&a_array[0], &lda[0], (const fp**)&x_array[0], &incx[0], + &beta[0], &y_array[0], &incy[0], group_count, &group_size[0], dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::aligned_free(m_ref); oneapi::math::aligned_free(n_ref); oneapi::math::aligned_free(lda_ref); @@ -268,7 +266,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV_BATCH:\n" << error.what() << std::endl; } @@ -306,7 +304,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } class GemvBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemvBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp b/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp index c28a4764e..7bb36b1ce 100644 --- a/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -101,11 +101,11 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during IMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -133,9 +133,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::imatcopy_batch, - trans, m, n, alpha, AB_buffer, lda, ldb, stride, - batch_size); + TEST_RUN_BLAS_CT_SELECT(main_queue, + oneapi::math::blas::column_major::imatcopy_batch, trans, m, + n, alpha, AB_buffer, lda, ldb, stride, batch_size); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::imatcopy_batch, @@ -146,17 +146,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -171,7 +171,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } class ImatcopyBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp index 5abb649fb..e0bd7ead0 100644 --- a/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -103,8 +103,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { AB.resize(stride * batch_size); AB_ref.resize(stride * batch_size); - fp **ab_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **ab_ref_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); + fp** ab_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** ab_ref_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); if ((ab_array == NULL) || (ab_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; oneapi::math::free_shared(ab_array, cxt); @@ -143,8 +143,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::imatcopy_batch(main_queue, trans, m, n, alpha, - &AB[0], lda, ldb, stride, - batch_size, dependencies); + &AB[0], lda, ldb, stride, + batch_size, dependencies); break; default: break; } @@ -152,9 +152,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::imatcopy_batch, - trans, m, n, alpha, &AB[0], lda, ldb, stride, batch_size, - dependencies); + TEST_RUN_BLAS_CT_SELECT( + main_queue, oneapi::math::blas::column_major::imatcopy_batch, trans, m, n, + alpha, &AB[0], lda, ldb, stride, batch_size, dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::imatcopy_batch, @@ -166,19 +166,19 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::free_shared(ab_array, cxt); oneapi::math::free_shared(ab_ref_array, cxt); return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -194,7 +194,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } class ImatcopyBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp b/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp index 25dd4cd3f..5c2a38ada 100644 --- a/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t group_count) { +int test(device* dev, oneapi::math::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during IMATCOPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -103,8 +103,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector ab_array(uafpp), ab_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector ab_array(uafpp), ab_ref_array(uafpp); ab_array.resize(total_batch_count); ab_ref_array.resize(total_batch_count); @@ -126,8 +126,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } size = std::max(size_a, size_b); for (j = 0; j < group_size[i]; j++) { - ab_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size, *dev, cxt); - ab_ref_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size, *dev, cxt); + ab_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size, *dev, cxt); + ab_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size, *dev, cxt); rand_matrix(ab_array[idx], oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1, size); copy_matrix(ab_array[idx], oneapi::math::layout::col_major, @@ -171,10 +171,10 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::imatcopy_batch, - trans.data(), m.data(), n.data(), alpha.data(), - ab_array.data(), lda.data(), ldb.data(), group_count, - group_size.data(), dependencies); + TEST_RUN_BLAS_CT_SELECT( + main_queue, oneapi::math::blas::column_major::imatcopy_batch, trans.data(), + m.data(), n.data(), alpha.data(), ab_array.data(), lda.data(), ldb.data(), + group_count, group_size.data(), dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::imatcopy_batch, @@ -187,13 +187,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { idx = 0; for (i = 0; i < group_count; i++) { for (j = 0; j < group_size[i]; j++) { @@ -205,7 +205,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY_BATCH:\n" << error.what() << std::endl; } @@ -229,9 +229,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } size = std::max(size_a, size_b); for (j = 0; j < group_size[i]; j++) { - good = good && - check_equal_matrix(ab_array[idx], ab_ref_array[idx], - oneapi::math::layout::col_major, size, 1, size, 10, std::cout); + good = good && check_equal_matrix(ab_array[idx], ab_ref_array[idx], + oneapi::math::layout::col_major, size, 1, size, 10, + std::cout); idx++; } } @@ -249,7 +249,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } class ImatcopyBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp b/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp index ddb37e883..2da2de4ab 100644 --- a/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Prepare data. int64_t m, n; int64_t lda, ldb, ldc; @@ -111,11 +111,11 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATADD_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -162,17 +162,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATADD_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATADD_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -187,7 +187,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } class OmataddBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmataddBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp index b01c2ec10..2f0deb8b4 100644 --- a/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATADD_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -110,10 +110,10 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { C.resize(stride_c * batch_size); C_ref.resize(stride_c * batch_size); - fp **a_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **b_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **c_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **c_ref_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); + fp** a_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** b_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** c_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** c_ref_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); if ((a_array == NULL) || (b_array == NULL) || (c_array == NULL) || (c_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -188,13 +188,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATADD_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::free_shared(a_array, cxt); oneapi::math::free_shared(b_array, cxt); oneapi::math::free_shared(c_array, cxt); @@ -202,7 +202,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATADD_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -220,7 +220,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } class OmataddBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmataddBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp b/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp index 20320d51d..9ef8cbdb6 100644 --- a/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -101,11 +101,11 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -123,22 +123,22 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::omatcopy_batch(main_queue, trans, m, n, alpha, - A_buffer, lda, stride_a, B_buffer, - ldb, stride_b, batch_size); + A_buffer, lda, stride_a, B_buffer, + ldb, stride_b, batch_size); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::omatcopy_batch(main_queue, trans, m, n, alpha, - A_buffer, lda, stride_a, B_buffer, ldb, - stride_b, batch_size); + A_buffer, lda, stride_a, B_buffer, + ldb, stride_b, batch_size); break; default: break; } #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatcopy_batch, - trans, m, n, alpha, A_buffer, lda, stride_a, B_buffer, ldb, - stride_b, batch_size); + TEST_RUN_BLAS_CT_SELECT( + main_queue, oneapi::math::blas::column_major::omatcopy_batch, trans, m, n, + alpha, A_buffer, lda, stride_a, B_buffer, ldb, stride_b, batch_size); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy_batch, @@ -149,17 +149,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -174,7 +174,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } class OmatcopyBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp index 72b7c8f1b..321c9b3bb 100644 --- a/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -105,9 +105,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { B.resize(stride_b * batch_size); B_ref.resize(stride_b * batch_size); - fp **a_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **b_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **b_ref_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); + fp** a_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** b_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** b_ref_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); if ((a_array == NULL) || (b_array == NULL) || (b_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -161,9 +161,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatcopy_batch, - trans, m, n, alpha, &A[0], lda, stride_a, &B[0], ldb, - stride_b, batch_size, dependencies); + TEST_RUN_BLAS_CT_SELECT( + main_queue, oneapi::math::blas::column_major::omatcopy_batch, trans, m, n, + alpha, &A[0], lda, stride_a, &B[0], ldb, stride_b, batch_size, dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy_batch, @@ -175,20 +175,20 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::free_shared(a_array, cxt); oneapi::math::free_shared(b_array, cxt); oneapi::math::free_shared(b_ref_array, cxt); return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -205,7 +205,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } class OmatcopyBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp b/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp index 190c38afe..cc8be6fe8 100644 --- a/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t group_count) { +int test(device* dev, oneapi::math::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -103,8 +103,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), b_array(uafpp), b_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), b_array(uafpp), b_ref_array(uafpp); a_array.resize(total_batch_count); b_array.resize(total_batch_count); @@ -126,9 +126,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { default: break; } for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - b_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); - b_ref_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); + a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + b_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); + b_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); rand_matrix(a_array[idx], oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_a, 1, size_a); rand_matrix(b_array[idx], oneapi::math::layout::col_major, @@ -161,14 +161,14 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::omatcopy_batch( main_queue, trans.data(), m.data(), n.data(), alpha.data(), - (const fp **)a_array.data(), lda.data(), b_array.data(), ldb.data(), - group_count, group_size.data(), dependencies); + (const fp**)a_array.data(), lda.data(), b_array.data(), ldb.data(), group_count, + group_size.data(), dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::omatcopy_batch( main_queue, trans.data(), m.data(), n.data(), alpha.data(), - (const fp **)a_array.data(), lda.data(), b_array.data(), ldb.data(), - group_count, group_size.data(), dependencies); + (const fp**)a_array.data(), lda.data(), b_array.data(), ldb.data(), group_count, + group_size.data(), dependencies); break; default: break; } @@ -176,15 +176,15 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::omatcopy_batch, - trans.data(), m.data(), n.data(), alpha.data(), - (const fp **)a_array.data(), lda.data(), b_array.data(), - ldb.data(), group_count, group_size.data(), dependencies); + TEST_RUN_BLAS_CT_SELECT( + main_queue, oneapi::math::blas::column_major::omatcopy_batch, trans.data(), + m.data(), n.data(), alpha.data(), (const fp**)a_array.data(), lda.data(), + b_array.data(), ldb.data(), group_count, group_size.data(), dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::omatcopy_batch, trans.data(), m.data(), n.data(), alpha.data(), - (const fp **)a_array.data(), lda.data(), b_array.data(), + (const fp**)a_array.data(), lda.data(), b_array.data(), ldb.data(), group_count, group_size.data(), dependencies); break; default: break; @@ -192,13 +192,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { idx = 0; for (i = 0; i < group_count; i++) { for (j = 0; j < group_size[i]; j++) { @@ -211,7 +211,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY_BATCH:\n" << error.what() << std::endl; } @@ -235,8 +235,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } for (j = 0; j < group_size[i]; j++) { good = good && check_equal_matrix(b_array[idx], b_ref_array[idx], - oneapi::math::layout::col_major, size_b, 1, size_b, 10, - std::cout); + oneapi::math::layout::col_major, size_b, 1, size_b, + 10, std::cout); idx++; } } @@ -255,7 +255,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } class OmatcopyBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/syrk_batch_stride.cpp b/tests/unit_tests/blas/batch/syrk_batch_stride.cpp index 3599d36f8..8470a9139 100644 --- a/tests/unit_tests/blas/batch/syrk_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/syrk_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Prepare data. int64_t n, k; int64_t lda, ldc; @@ -67,9 +67,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { upper_lower = (oneapi::math::uplo)(std::rand() % 2); if ((std::is_same::value) || (std::is_same::value)) { - trans = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans - : (std::rand() % 2) == 0 ? oneapi::math::transpose::trans - : oneapi::math::transpose::conjtrans; + trans = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans + : (std::rand() % 2) == 0 ? oneapi::math::transpose::trans + : oneapi::math::transpose::conjtrans; } else { trans = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans @@ -110,21 +110,21 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { for (i = 0; i < batch_size_ref; i++) { ::syrk(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), - convert_to_cblas_trans(trans), (const int *)&n_ref, (const int *)&k_ref, - (const fp_ref *)&alpha, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (const fp_ref *)&beta, - (fp_ref *)(C_ref.data() + stride_c * i), (const int *)&ldc_ref); + convert_to_cblas_trans(trans), (const int*)&n_ref, (const int*)&k_ref, + (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)&beta, (fp_ref*)(C_ref.data() + stride_c * i), + (const int*)&ldc_ref); } // Call DPC++ SYRK_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYRK_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -142,13 +142,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::syrk_batch(main_queue, upper_lower, trans, n, k, - alpha, A_buffer, lda, stride_a, beta, - C_buffer, ldc, stride_c, batch_size); + alpha, A_buffer, lda, stride_a, beta, + C_buffer, ldc, stride_c, batch_size); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::syrk_batch(main_queue, upper_lower, trans, n, k, - alpha, A_buffer, lda, stride_a, beta, - C_buffer, ldc, stride_c, batch_size); + alpha, A_buffer, lda, stride_a, beta, + C_buffer, ldc, stride_c, batch_size); break; default: break; } @@ -168,17 +168,17 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYRK_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYRK_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -187,14 +187,14 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { auto C_accessor = C_buffer.get_host_access(read_only); bool good = - check_equal_matrix(C_accessor, C_ref, oneapi::math::layout::col_major, stride_c * batch_size, - 1, stride_c * batch_size, 10 * k, std::cout); + check_equal_matrix(C_accessor, C_ref, oneapi::math::layout::col_major, + stride_c * batch_size, 1, stride_c * batch_size, 10 * k, std::cout); return (int)good; } class SyrkBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SyrkBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp index 06aea5c4f..4736ae1a2 100644 --- a/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::math::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYRK_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -86,9 +86,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { beta = rand_scalar(); upper_lower = (oneapi::math::uplo)(std::rand() % 2); if ((std::is_same::value) || (std::is_same::value)) { - trans = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans - : (std::rand() % 2) == 0 ? oneapi::math::transpose::trans - : oneapi::math::transpose::conjtrans; + trans = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans + : (std::rand() % 2) == 0 ? oneapi::math::transpose::trans + : oneapi::math::transpose::conjtrans; } else { trans = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans @@ -116,9 +116,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { C.resize(stride_c * batch_size); C_ref.resize(stride_c * batch_size); - fp **a_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **c_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **c_ref_array = (fp **)oneapi::math::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); + fp** a_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** c_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** c_ref_array = (fp**)oneapi::math::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); if ((a_array == NULL) || (c_array == NULL) || (c_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -150,10 +150,10 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { ::syrk(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), - convert_to_cblas_trans(trans), (const int *)&n_ref, (const int *)&k_ref, - (const fp_ref *)&alpha, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (const fp_ref *)&beta, - (fp_ref *)(C_ref.data() + stride_c * i), (const int *)&ldc_ref); + convert_to_cblas_trans(trans), (const int*)&n_ref, (const int*)&k_ref, + (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)&beta, (fp_ref*)(C_ref.data() + stride_c * i), + (const int*)&ldc_ref); } // Call DPC++ SYRK_BATCH_STRIDE. @@ -191,20 +191,20 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYRK_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::free_shared(a_array, cxt); oneapi::math::free_shared(c_array, cxt); oneapi::math::free_shared(c_ref_array, cxt); return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYRK_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -221,7 +221,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t batch_size) { } class SyrkBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SyrkBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/syrk_batch_usm.cpp b/tests/unit_tests/blas/batch/syrk_batch_usm.cpp index bf7358ca9..ef117619b 100644 --- a/tests/unit_tests/blas/batch/syrk_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/syrk_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t group_count) { +int test(device* dev, oneapi::math::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYRK_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -106,10 +106,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { beta[i] = rand_scalar(); upper_lower[i] = (oneapi::math::uplo)(std::rand() % 2); if ((std::is_same::value) || (std::is_same::value)) { - trans[i] = (std::rand() % 2) == 0 - ? oneapi::math::transpose::nontrans - : (std::rand() % 2) == 0 ? oneapi::math::transpose::trans - : oneapi::math::transpose::conjtrans; + trans[i] = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans + : (std::rand() % 2) == 0 ? oneapi::math::transpose::trans + : oneapi::math::transpose::conjtrans; } else { trans[i] = (std::rand() % 2) == 0 ? oneapi::math::transpose::nontrans @@ -118,8 +117,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), c_array(uafpp), c_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), c_array(uafpp), c_ref_array(uafpp); a_array.resize(total_batch_count); c_array.resize(total_batch_count); c_ref_array.resize(total_batch_count); @@ -138,11 +137,12 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { default: break; } for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - c_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); - c_ref_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); + a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + c_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); + c_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); rand_matrix(a_array[idx], layout, trans[i], n[i], k[i], lda[i]); - rand_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, n[i], n[i], ldc[i]); + rand_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, n[i], n[i], + ldc[i]); copy_matrix(c_array[idx], layout, oneapi::math::transpose::nontrans, n[i], n[i], ldc[i], c_ref_array[idx]); idx++; @@ -151,16 +151,16 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { // Call reference SYRK_BATCH. using fp_ref = typename ref_type_info::type; - int *n_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *k_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *ldc_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* k_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* ldc_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - CBLAS_UPLO *upper_lower_ref = - (CBLAS_UPLO *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count); - CBLAS_TRANSPOSE *trans_ref = - (CBLAS_TRANSPOSE *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + CBLAS_UPLO* upper_lower_ref = + (CBLAS_UPLO*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count); + CBLAS_TRANSPOSE* trans_ref = + (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); if ((n_ref == NULL) || (k_ref == NULL) || (lda_ref == NULL) || (ldc_ref == NULL) || (trans_ref == NULL) || (upper_lower_ref == NULL) || (group_size_ref == NULL)) { @@ -194,9 +194,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { ::syrk(convert_to_cblas_layout(layout), upper_lower_ref[i], trans_ref[i], - (const int *)&n_ref[i], (const int *)&k_ref[i], (const fp_ref *)&alpha[i], - (const fp_ref *)a_array[idx], (const int *)&lda_ref[i], (const fp_ref *)&beta[i], - (fp_ref *)c_ref_array[idx], (const int *)&ldc_ref[i]); + (const int*)&n_ref[i], (const int*)&k_ref[i], (const fp_ref*)&alpha[i], + (const fp_ref*)a_array[idx], (const int*)&lda_ref[i], (const fp_ref*)&beta[i], + (fp_ref*)c_ref_array[idx], (const int*)&ldc_ref[i]); idx++; } } @@ -209,13 +209,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::syrk_batch( main_queue, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, + (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::syrk_batch( main_queue, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, + (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -226,13 +226,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { case oneapi::math::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::syrk_batch, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &beta[0], &c_array[0], + (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk_batch, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &beta[0], &c_array[0], + (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -240,13 +240,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYRK_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::aligned_free(n_ref); oneapi::math::aligned_free(k_ref); oneapi::math::aligned_free(lda_ref); @@ -266,7 +266,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYRK_BATCH:\n" << error.what() << std::endl; } @@ -301,7 +301,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } class SyrkBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SyrkBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/trsm_batch_stride.cpp b/tests/unit_tests/blas/batch/trsm_batch_stride.cpp index 5ae26282d..37fd56886 100644 --- a/tests/unit_tests/blas/batch/trsm_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/trsm_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -116,21 +116,20 @@ int test(device *dev, oneapi::math::layout layout) { for (i = 0; i < batch_size_ref; i++) { ::trsm(convert_to_cblas_layout(layout), convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - convert_to_cblas_diag(unit_nonunit), (const int *)&m_ref, (const int *)&n_ref, - (const fp_ref *)&alpha, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (fp_ref *)(B_ref.data() + stride_b * i), - (const int *)&ldb_ref); + convert_to_cblas_diag(unit_nonunit), (const int*)&m_ref, (const int*)&n_ref, + (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (fp_ref*)(B_ref.data() + stride_b * i), (const int*)&ldb_ref); } // Call DPC++ TRSM_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during TRSM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -176,17 +175,17 @@ int test(device *dev, oneapi::math::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during TRSM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of TRSM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -201,7 +200,7 @@ int test(device *dev, oneapi::math::layout layout) { } class TrsmBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(TrsmBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp index ec0f17572..0bc331cd5 100644 --- a/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during TRSM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -137,10 +137,9 @@ int test(device *dev, oneapi::math::layout layout) { for (i = 0; i < batch_size_ref; i++) { ::trsm(convert_to_cblas_layout(layout), convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - convert_to_cblas_diag(unit_nonunit), (const int *)&m_ref, (const int *)&n_ref, - (const fp_ref *)&alpha, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (fp_ref *)(B_ref.data() + stride_b * i), - (const int *)&ldb_ref); + convert_to_cblas_diag(unit_nonunit), (const int*)&m_ref, (const int*)&n_ref, + (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (fp_ref*)(B_ref.data() + stride_b * i), (const int*)&ldb_ref); } // Call DPC++ TRSM_BATCH_STRIDE. @@ -180,17 +179,17 @@ int test(device *dev, oneapi::math::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during TRSM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of TRSM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -203,7 +202,7 @@ int test(device *dev, oneapi::math::layout layout) { } class TrsmBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(TrsmBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/batch/trsm_batch_usm.cpp b/tests/unit_tests/blas/batch/trsm_batch_usm.cpp index 8caeb9f14..cb4f06bbc 100644 --- a/tests/unit_tests/blas/batch/trsm_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/trsm_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int64_t group_count) { +int test(device* dev, oneapi::math::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during TRSM_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -128,8 +128,8 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), b_array(uafpp), b_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), b_array(uafpp), b_ref_array(uafpp); a_array.resize(total_batch_count); b_array.resize(total_batch_count); @@ -141,11 +141,12 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { Arank = left_right[i] == oneapi::math::side::left ? m[i] : n[i]; size_b = ldb[i] * ((layout == oneapi::math::layout::col_major) ? n[i] : m[i]); for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - b_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); - b_ref_array[idx] = (fp *)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); + a_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + b_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); + b_ref_array[idx] = (fp*)oneapi::math::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); rand_trsm_matrix(a_array[idx], layout, trans[i], Arank, Arank, lda[i]); - rand_matrix(b_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], ldb[i]); + rand_matrix(b_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], + ldb[i]); copy_matrix(b_array[idx], layout, oneapi::math::transpose::nontrans, m[i], n[i], ldb[i], b_ref_array[idx]); idx++; @@ -154,20 +155,20 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { // Call reference TRSM_BATCH. using fp_ref = typename ref_type_info::type; - int *m_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *n_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *ldb_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); - - CBLAS_TRANSPOSE *trans_ref = - (CBLAS_TRANSPOSE *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); - CBLAS_SIDE *left_right_ref = - (CBLAS_SIDE *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count); - CBLAS_UPLO *upper_lower_ref = - (CBLAS_UPLO *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count); - CBLAS_DIAG *unit_nonunit_ref = - (CBLAS_DIAG *)oneapi::math::aligned_alloc(64, sizeof(CBLAS_DIAG) * group_count); + int* m_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* ldb_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::math::aligned_alloc(64, sizeof(int) * group_count); + + CBLAS_TRANSPOSE* trans_ref = + (CBLAS_TRANSPOSE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + CBLAS_SIDE* left_right_ref = + (CBLAS_SIDE*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count); + CBLAS_UPLO* upper_lower_ref = + (CBLAS_UPLO*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count); + CBLAS_DIAG* unit_nonunit_ref = + (CBLAS_DIAG*)oneapi::math::aligned_alloc(64, sizeof(CBLAS_DIAG) * group_count); if ((m_ref == NULL) || (n_ref == NULL) || (lda_ref == NULL) || (ldb_ref == NULL) || (trans_ref == NULL) || (left_right_ref == NULL) || (upper_lower_ref == NULL) || @@ -206,9 +207,9 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { ::trsm(convert_to_cblas_layout(layout), left_right_ref[i], upper_lower_ref[i], - trans_ref[i], unit_nonunit_ref[i], (const int *)&m_ref[i], - (const int *)&n_ref[i], (const fp_ref *)&alpha[i], (const fp_ref *)a_array[idx], - (const int *)&lda_ref[i], b_ref_array[idx], (const int *)&ldb_ref[i]); + trans_ref[i], unit_nonunit_ref[i], (const int*)&m_ref[i], (const int*)&n_ref[i], + (const fp_ref*)&alpha[i], (const fp_ref*)a_array[idx], (const int*)&lda_ref[i], + b_ref_array[idx], (const int*)&ldb_ref[i]); idx++; } } @@ -221,13 +222,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::trsm_batch( main_queue, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0], - &n[0], &alpha[0], (const fp **)&a_array[0], &lda[0], &b_array[0], &ldb[0], + &n[0], &alpha[0], (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::trsm_batch( main_queue, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0], - &n[0], &alpha[0], (const fp **)&a_array[0], &lda[0], &b_array[0], &ldb[0], + &n[0], &alpha[0], (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0], group_count, &group_size[0], dependencies); break; default: break; @@ -239,14 +240,14 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::trsm_batch, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0], &n[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &b_array[0], &ldb[0], + (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0], group_count, &group_size[0], dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsm_batch, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0], &n[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &b_array[0], &ldb[0], + (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0], group_count, &group_size[0], dependencies); break; default: break; @@ -254,13 +255,13 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during TRSM_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { oneapi::math::aligned_free(m_ref); oneapi::math::aligned_free(n_ref); oneapi::math::aligned_free(lda_ref); @@ -282,7 +283,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of TRSM_BATCH:\n" << error.what() << std::endl; } @@ -319,7 +320,7 @@ int test(device *dev, oneapi::math::layout layout, int64_t group_count) { } class TrsmBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(TrsmBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/extensions/gemm_bias.cpp b/tests/unit_tests/blas/extensions/gemm_bias.cpp index 8ef864af7..012208587 100644 --- a/tests/unit_tests/blas/extensions/gemm_bias.cpp +++ b/tests/unit_tests/blas/extensions/gemm_bias.cpp @@ -63,11 +63,14 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose trans rand_matrix(B, layout, transb, k, n, ldb); rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc); if (offsetc == oneapi::math::offset::fix) - rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, 1, 1, 1); + rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, 1, 1, + 1); if (offsetc == oneapi::math::offset::column) - rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, m, 1, m); + rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, m, 1, + m); if (offsetc == oneapi::math::offset::row) - rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, n, 1, n); + rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, n, 1, + n); C_ref = C; @@ -113,14 +116,14 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose trans #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::gemm_bias(main_queue, transa, transb, offsetc, m, - n, k, alpha, A_buffer, lda, ao, B_buffer, - ldb, bo, beta, C_buffer, ldc, CO_buffer); + oneapi::math::blas::column_major::gemm_bias( + main_queue, transa, transb, offsetc, m, n, k, alpha, A_buffer, lda, ao, + B_buffer, ldb, bo, beta, C_buffer, ldc, CO_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::gemm_bias(main_queue, transa, transb, offsetc, m, n, - k, alpha, A_buffer, lda, ao, B_buffer, ldb, - bo, beta, C_buffer, ldc, CO_buffer); + k, alpha, A_buffer, lda, ao, B_buffer, ldb, + bo, beta, C_buffer, ldc, CO_buffer); break; default: break; } @@ -132,8 +135,8 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose trans B_buffer, ldb, bo, beta, C_buffer, ldc, CO_buffer); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_bias, transa, - transb, offsetc, m, n, k, alpha, A_buffer, lda, ao, + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_bias, + transa, transb, offsetc, m, n, k, alpha, A_buffer, lda, ao, B_buffer, ldb, bo, beta, C_buffer, ldc, CO_buffer); break; default: break; diff --git a/tests/unit_tests/blas/extensions/gemm_bias_usm.cpp b/tests/unit_tests/blas/extensions/gemm_bias_usm.cpp index 19f68dc66..4c10aef2d 100644 --- a/tests/unit_tests/blas/extensions/gemm_bias_usm.cpp +++ b/tests/unit_tests/blas/extensions/gemm_bias_usm.cpp @@ -85,11 +85,14 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose trans rand_matrix(B, layout, transb, k, n, ldb); rand_matrix(C, layout, oneapi::math::transpose::nontrans, m, n, ldc); if (offsetc == oneapi::math::offset::fix) - rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, 1, 1, 1); + rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, 1, 1, + 1); if (offsetc == oneapi::math::offset::column) - rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, m, 1, m); + rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, m, 1, + m); if (offsetc == oneapi::math::offset::row) - rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, n, 1, n); + rand_matrix(co, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, n, 1, + n); C_ref.resize(C.size()); for (int i = 0; i < C.size(); i++) @@ -137,8 +140,8 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose trans dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_bias, transa, - transb, offsetc, m, n, k, alpha, A.data(), lda, ao, + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gemm_bias, + transa, transb, offsetc, m, n, k, alpha, A.data(), lda, ao, B.data(), ldb, bo, beta, C.data(), ldc, co.data(), dependencies); break; diff --git a/tests/unit_tests/blas/extensions/gemmt.cpp b/tests/unit_tests/blas/extensions/gemmt.cpp index fdcec77e6..f1fe54791 100644 --- a/tests/unit_tests/blas/extensions/gemmt.cpp +++ b/tests/unit_tests/blas/extensions/gemmt.cpp @@ -96,13 +96,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::gemmt(main_queue, upper_lower, transa, transb, n, - k, alpha, A_buffer, lda, B_buffer, ldb, beta, - C_buffer, ldc); + k, alpha, A_buffer, lda, B_buffer, ldb, + beta, C_buffer, ldc); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::gemmt(main_queue, upper_lower, transa, transb, n, k, - alpha, A_buffer, lda, B_buffer, ldb, beta, - C_buffer, ldc); + alpha, A_buffer, lda, B_buffer, ldb, beta, + C_buffer, ldc); break; default: break; } @@ -143,8 +143,8 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class GemmtTests : public ::testing::TestWithParam> { -}; +class GemmtTests + : public ::testing::TestWithParam> {}; TEST_P(GemmtTests, RealSinglePrecision) { float alpha(2.0); @@ -239,28 +239,28 @@ TEST_P(GemmtTests, ComplexSinglePrecision) { alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, - beta)); + oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, + alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, @@ -275,28 +275,28 @@ TEST_P(GemmtTests, ComplexSinglePrecision) { alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, - beta)); + oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, + alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); } TEST_P(GemmtTests, ComplexDoublePrecision) { @@ -318,28 +318,28 @@ TEST_P(GemmtTests, ComplexDoublePrecision) { alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, - beta)); + oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, + alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, @@ -354,28 +354,28 @@ TEST_P(GemmtTests, ComplexDoublePrecision) { alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, - beta)); + oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, + alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); } INSTANTIATE_TEST_SUITE_P(GemmtTestSuite, GemmtTests, diff --git a/tests/unit_tests/blas/extensions/gemmt_usm.cpp b/tests/unit_tests/blas/extensions/gemmt_usm.cpp index 8a5db1e15..3674fe634 100644 --- a/tests/unit_tests/blas/extensions/gemmt_usm.cpp +++ b/tests/unit_tests/blas/extensions/gemmt_usm.cpp @@ -101,8 +101,8 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::gemmt(main_queue, upper_lower, transa, transb, - n, k, alpha, A.data(), lda, B.data(), - ldb, beta, C.data(), ldc, dependencies); + n, k, alpha, A.data(), lda, B.data(), + ldb, beta, C.data(), ldc, dependencies); break; default: break; } @@ -239,28 +239,28 @@ TEST_P(GemmtUsmTests, ComplexSinglePrecision) { alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, - beta)); + oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, + alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, @@ -275,28 +275,28 @@ TEST_P(GemmtUsmTests, ComplexSinglePrecision) { alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, - beta)); + oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, + alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); } TEST_P(GemmtUsmTests, ComplexDoublePrecision) { @@ -318,28 +318,28 @@ TEST_P(GemmtUsmTests, ComplexDoublePrecision) { alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, - beta)); + oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, + alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, @@ -354,28 +354,28 @@ TEST_P(GemmtUsmTests, ComplexDoublePrecision) { alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, - beta)); + oneapi::math::transpose::trans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, + alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, 27, 98, 101, 102, + 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::transpose::trans, 27, 98, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, 103, - alpha, beta)); + oneapi::math::transpose::conjtrans, oneapi::math::transpose::conjtrans, 27, 98, 101, 102, + 103, alpha, beta)); } INSTANTIATE_TEST_SUITE_P(GemmtUsmTestSuite, GemmtUsmTests, diff --git a/tests/unit_tests/blas/extensions/imatcopy.cpp b/tests/unit_tests/blas/extensions/imatcopy.cpp index 46335f012..f6480ee16 100644 --- a/tests/unit_tests/blas/extensions/imatcopy.cpp +++ b/tests/unit_tests/blas/extensions/imatcopy.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -81,8 +81,8 @@ int test(device *dev, oneapi::math::layout layout) { rand_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1, size); - copy_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1, size, - AB_ref); + copy_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1, + size, AB_ref); // Call reference IMATCOPY. int m_ref = (int)m; @@ -95,11 +95,11 @@ int test(device *dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during IMATCOPY:\n" << e.what() << std::endl; print_error_code(e); @@ -115,12 +115,12 @@ int test(device *dev, oneapi::math::layout layout) { #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::imatcopy(main_queue, trans, m, n, alpha, AB_buffer, - lda, ldb); + oneapi::math::blas::column_major::imatcopy(main_queue, trans, m, n, alpha, + AB_buffer, lda, ldb); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::imatcopy(main_queue, trans, m, n, alpha, AB_buffer, - lda, ldb); + lda, ldb); break; default: break; } @@ -138,17 +138,17 @@ int test(device *dev, oneapi::math::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY:\n" << error.what() << std::endl; } @@ -162,7 +162,7 @@ int test(device *dev, oneapi::math::layout layout) { } class ImatcopyTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/imatcopy_usm.cpp b/tests/unit_tests/blas/extensions/imatcopy_usm.cpp index c247cd699..dd98e97fa 100644 --- a/tests/unit_tests/blas/extensions/imatcopy_usm.cpp +++ b/tests/unit_tests/blas/extensions/imatcopy_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during IMATCOPY:\n" << e.what() << std::endl; print_error_code(e); @@ -105,8 +105,8 @@ int test(device *dev, oneapi::math::layout layout) { rand_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1, size); - copy_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1, size, - AB_ref); + copy_matrix(AB, oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size, 1, + size, AB_ref); // Call reference IMATCOPY. int m_ref = (int)m; @@ -121,11 +121,11 @@ int test(device *dev, oneapi::math::layout layout) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::imatcopy(main_queue, trans, m, n, alpha, - &AB[0], lda, ldb, dependencies); + &AB[0], lda, ldb, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::imatcopy(main_queue, trans, m, n, alpha, - &AB[0], lda, ldb, dependencies); + &AB[0], lda, ldb, dependencies); break; default: break; } @@ -145,17 +145,17 @@ int test(device *dev, oneapi::math::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY:\n" << error.what() << std::endl; } @@ -167,7 +167,7 @@ int test(device *dev, oneapi::math::layout layout) { } class ImatcopyUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatadd.cpp b/tests/unit_tests/blas/extensions/omatadd.cpp index bf0a9ac23..cb404cc34 100644 --- a/tests/unit_tests/blas/extensions/omatadd.cpp +++ b/tests/unit_tests/blas/extensions/omatadd.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb, ldc; @@ -84,14 +84,14 @@ int test(device *dev, oneapi::math::layout layout) { vector> A(size_a), B(size_b), C(size_c), C_ref(size_c); - rand_matrix(A.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_a, - 1, size_a); - rand_matrix(B.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_b, - 1, size_b); - rand_matrix(C.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_c, - 1, size_c); - copy_matrix(C.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_c, - 1, size_c, C_ref.data()); + rand_matrix(A.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, + size_a, 1, size_a); + rand_matrix(B.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, + size_b, 1, size_b); + rand_matrix(C.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, + size_c, 1, size_c); + copy_matrix(C.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, + size_c, 1, size_c, C_ref.data()); // Call reference OMATADD. int m_ref = (int)m; @@ -106,11 +106,11 @@ int test(device *dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATADD:\n" << e.what() << std::endl; print_error_code(e); @@ -129,13 +129,13 @@ int test(device *dev, oneapi::math::layout layout) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::omatadd(main_queue, transa, transb, m, n, alpha, - A_buffer, lda, beta, B_buffer, ldb, - C_buffer, ldc); + A_buffer, lda, beta, B_buffer, ldb, + C_buffer, ldc); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::omatadd(main_queue, transa, transb, m, n, alpha, - A_buffer, lda, beta, B_buffer, ldb, C_buffer, - ldc); + A_buffer, lda, beta, B_buffer, ldb, C_buffer, + ldc); break; default: break; } @@ -155,16 +155,16 @@ int test(device *dev, oneapi::math::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATADD:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATADD:\n" << error.what() << std::endl; } @@ -178,7 +178,7 @@ int test(device *dev, oneapi::math::layout layout) { } class OmataddTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmataddTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatadd_usm.cpp b/tests/unit_tests/blas/extensions/omatadd_usm.cpp index cb9b45d4a..472c693c8 100644 --- a/tests/unit_tests/blas/extensions/omatadd_usm.cpp +++ b/tests/unit_tests/blas/extensions/omatadd_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATADD:\n" << e.what() << std::endl; print_error_code(e); @@ -133,13 +133,13 @@ int test(device *dev, oneapi::math::layout layout) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::omatadd(main_queue, transa, transb, m, n, - alpha, &A[0], lda, beta, &B[0], ldb, - &C[0], ldc, dependencies); + alpha, &A[0], lda, beta, &B[0], + ldb, &C[0], ldc, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::omatadd(main_queue, transa, transb, m, n, - alpha, &A[0], lda, beta, &B[0], ldb, - &C[0], ldc, dependencies); + alpha, &A[0], lda, beta, &B[0], ldb, + &C[0], ldc, dependencies); break; default: break; } @@ -161,16 +161,16 @@ int test(device *dev, oneapi::math::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATADD:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATADD:\n" << error.what() << std::endl; } @@ -182,7 +182,7 @@ int test(device *dev, oneapi::math::layout layout) { } class OmataddUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmataddUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatcopy.cpp b/tests/unit_tests/blas/extensions/omatcopy.cpp index f77de83f1..27aeb0739 100644 --- a/tests/unit_tests/blas/extensions/omatcopy.cpp +++ b/tests/unit_tests/blas/extensions/omatcopy.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -103,11 +103,11 @@ int test(device *dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY:\n" << e.what() << std::endl; print_error_code(e); @@ -125,11 +125,11 @@ int test(device *dev, oneapi::math::layout layout) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::omatcopy(main_queue, trans, m, n, alpha, A_buffer, - lda, B_buffer, ldb); + lda, B_buffer, ldb); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::omatcopy(main_queue, trans, m, n, alpha, A_buffer, - lda, B_buffer, ldb); + lda, B_buffer, ldb); break; default: break; } @@ -147,17 +147,17 @@ int test(device *dev, oneapi::math::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY:\n" << error.what() << std::endl; } @@ -171,7 +171,7 @@ int test(device *dev, oneapi::math::layout layout) { } class OmatcopyTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatcopy2.cpp b/tests/unit_tests/blas/extensions/omatcopy2.cpp index 41b4b6e7c..1829f7e77 100644 --- a/tests/unit_tests/blas/extensions/omatcopy2.cpp +++ b/tests/unit_tests/blas/extensions/omatcopy2.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -83,8 +83,8 @@ int test(device *dev, oneapi::math::layout layout) { rand_matrix(A.data(), layout, oneapi::math::transpose::nontrans, m, n, lda); rand_matrix(B.data(), layout, trans, m, n, ldb); - copy_matrix(B.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, size_b, - 1, size_b, B_ref.data()); + copy_matrix(B.data(), oneapi::math::layout::col_major, oneapi::math::transpose::nontrans, + size_b, 1, size_b, B_ref.data()); // Call reference OMATCOPY2. int64_t m_ref = m; @@ -100,11 +100,11 @@ int test(device *dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY2:\n" << e.what() << std::endl; print_error_code(e); @@ -121,12 +121,13 @@ int test(device *dev, oneapi::math::layout layout) { #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::omatcopy2(main_queue, trans, m, n, alpha, A_buffer, - lda, stride_a, B_buffer, ldb, stride_b); + oneapi::math::blas::column_major::omatcopy2(main_queue, trans, m, n, alpha, + A_buffer, lda, stride_a, B_buffer, ldb, + stride_b); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::omatcopy2(main_queue, trans, m, n, alpha, A_buffer, - lda, stride_a, B_buffer, ldb, stride_b); + lda, stride_a, B_buffer, ldb, stride_b); break; default: break; } @@ -146,17 +147,17 @@ int test(device *dev, oneapi::math::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY2:\n" << error.what() << std::endl; } @@ -170,7 +171,7 @@ int test(device *dev, oneapi::math::layout layout) { } class Omatcopy2Tests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Omatcopy2Tests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp b/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp index cdcc21020..59a8955df 100644 --- a/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp +++ b/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY2:\n" << e.what() << std::endl; print_error_code(e); @@ -129,13 +129,13 @@ int test(device *dev, oneapi::math::layout layout) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::omatcopy2(main_queue, trans, m, n, alpha, - &A[0], lda, stride_a, &B[0], ldb, - stride_b, dependencies); + &A[0], lda, stride_a, &B[0], ldb, + stride_b, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::omatcopy2(main_queue, trans, m, n, alpha, - &A[0], lda, stride_a, &B[0], ldb, - stride_b, dependencies); + &A[0], lda, stride_a, &B[0], ldb, + stride_b, dependencies); break; default: break; } @@ -157,17 +157,17 @@ int test(device *dev, oneapi::math::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY2:\n" << error.what() << std::endl; } @@ -179,7 +179,7 @@ int test(device *dev, oneapi::math::layout layout) { } class Omatcopy2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Omatcopy2UsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatcopy_usm.cpp b/tests/unit_tests/blas/extensions/omatcopy_usm.cpp index 7ba026ab3..d3b6be7fe 100644 --- a/tests/unit_tests/blas/extensions/omatcopy_usm.cpp +++ b/tests/unit_tests/blas/extensions/omatcopy_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY:\n" << e.what() << std::endl; print_error_code(e); @@ -126,8 +126,8 @@ int test(device *dev, oneapi::math::layout layout) { main_queue, trans, m, n, alpha, &A[0], lda, &B[0], ldb, dependencies); break; case oneapi::math::layout::row_major: - done = oneapi::math::blas::row_major::omatcopy(main_queue, trans, m, n, alpha, &A[0], - lda, &B[0], ldb, dependencies); + done = oneapi::math::blas::row_major::omatcopy( + main_queue, trans, m, n, alpha, &A[0], lda, &B[0], ldb, dependencies); break; default: break; } @@ -147,17 +147,17 @@ int test(device *dev, oneapi::math::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY:\n" << error.what() << std::endl; } @@ -169,7 +169,7 @@ int test(device *dev, oneapi::math::layout layout) { } class OmatcopyUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/include/onemath_blas_helper.hpp b/tests/unit_tests/blas/include/onemath_blas_helper.hpp index f5944fe92..f258e55b9 100644 --- a/tests/unit_tests/blas/include/onemath_blas_helper.hpp +++ b/tests/unit_tests/blas/include/onemath_blas_helper.hpp @@ -63,7 +63,7 @@ inline CBLAS_OFFSET convert_to_cblas_offset(oneapi::math::offset offsetc) { inline CBLAS_LAYOUT convert_to_cblas_layout(oneapi::math::layout is_column) { return is_column == oneapi::math::layout::col_major ? CBLAS_LAYOUT::CblasColMajor - : CBLAS_LAYOUT::CblasRowMajor; + : CBLAS_LAYOUT::CblasRowMajor; } static const CBLAS_TRANSPOSE fcblastrans[] = { CblasNoTrans, CblasTrans, CblasConjTrans }; diff --git a/tests/unit_tests/blas/include/reference_blas_templates.hpp b/tests/unit_tests/blas/include/reference_blas_templates.hpp index b84e71c1b..45263cc63 100644 --- a/tests/unit_tests/blas/include/reference_blas_templates.hpp +++ b/tests/unit_tests/blas/include/reference_blas_templates.hpp @@ -33,8 +33,8 @@ inline bool isNonTranspose(CBLAS_TRANSPOSE trans) { } template -static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, int row, - int col, int ld, T_dest *&dest) { +static inline void copy_mat(T_src& src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, int row, + int col, int ld, T_dest*& dest) { int i, j, Iend, Jend; if (layout == CblasColMajor) { Jend = isNonTranspose(trans) ? col : row; @@ -53,8 +53,8 @@ static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE tra } template -static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, int row, - int col, int ld, T_dest off, T_dest *&dest) { +static inline void copy_mat(T_src& src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, int row, + int col, int ld, T_dest off, T_dest*& dest) { int i, j, Iend, Jend; if (layout == CblasColMajor) { Jend = isNonTranspose(trans) ? col : row; @@ -73,8 +73,8 @@ static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE tra } template -static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, int row, int col, int ld, - CBLAS_OFFSET off_kind, T_off off, T_dest &dest) { +static inline void copy_mat(T_src& src, CBLAS_LAYOUT layout, int row, int col, int ld, + CBLAS_OFFSET off_kind, T_off off, T_dest& dest) { using T_data = typename std::remove_reference::type; int i, j; T_data tmp; @@ -110,8 +110,8 @@ static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, int row, int col, i } template -static inline void update_c(T_src &src, CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, int row, - int col, int ld, T_desc *&dest) { +static inline void update_c(T_src& src, CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, int row, + int col, int ld, T_desc*& dest) { int i, j; int Jend = (layout == CblasColMajor) ? col : row; @@ -139,15 +139,15 @@ static inline void update_c(T_src &src, CBLAS_LAYOUT layout, CBLAS_UPLO upper_lo /* Level 3 */ template -static void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const fp *alpha, const fp *a, const int *lda, - const fp *b, const int *ldb, const fp *beta, fp *c, const int *ldc); +static void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const fp* alpha, const fp* a, const int* lda, + const fp* b, const int* ldb, const fp* beta, fp* c, const int* ldc); template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const sycl::half *alpha, const sycl::half *a, const int *lda, - const sycl::half *b, const int *ldb, const sycl::half *beta, sycl::half *c, - const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const sycl::half* alpha, const sycl::half* a, const int* lda, + const sycl::half* b, const int* ldb, const sycl::half* beta, sycl::half* c, + const int* ldc) { // Not supported in NETLIB. SGEMM is used as reference. int sizea, sizeb, sizec; const float alphaf = *alpha; @@ -162,9 +162,9 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - float *af = (float *)oneapi::math::aligned_alloc(64, sizeof(float) * sizea); - float *bf = (float *)oneapi::math::aligned_alloc(64, sizeof(float) * sizeb); - float *cf = (float *)oneapi::math::aligned_alloc(64, sizeof(float) * sizec); + float* af = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizea); + float* bf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizeb); + float* cf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizec); copy_mat(a, layout, transa, *m, *k, *lda, af); copy_mat(b, layout, transb, *k, *n, *ldb, bf); copy_mat(c, layout, CblasNoTrans, *m, *n, *ldc, cf); @@ -177,49 +177,49 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const float *alpha, const float *a, const int *lda, - const float *b, const int *ldb, const float *beta, float *c, const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const float* alpha, const float* a, const int* lda, + const float* b, const int* ldb, const float* beta, float* c, const int* ldc) { cblas_sgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const double *alpha, const double *a, const int *lda, - const double *b, const int *ldb, const double *beta, double *c, const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const double* alpha, const double* a, const int* lda, + const double* b, const int* ldb, const double* beta, double* c, const int* ldc) { cblas_dgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *b, - const int *ldb, const std::complex *beta, std::complex *c, const int *ldc) { - cblas_cgemm_wrapper(layout, transa, transb, *m, *n, *k, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const std::complex* alpha, + const std::complex* a, const int* lda, const std::complex* b, + const int* ldb, const std::complex* beta, std::complex* c, const int* ldc) { + cblas_cgemm_wrapper(layout, transa, transb, *m, *n, *k, (const void*)alpha, (const void*)a, + *lda, (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *b, - const int *ldb, const std::complex *beta, std::complex *c, - const int *ldc) { - cblas_zgemm_wrapper(layout, transa, transb, *m, *n, *k, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const std::complex* alpha, + const std::complex* a, const int* lda, const std::complex* b, + const int* ldb, const std::complex* beta, std::complex* c, + const int* ldc) { + cblas_zgemm_wrapper(layout, transa, transb, *m, *n, *k, (const void*)alpha, (const void*)a, + *lda, (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template -static void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const fpc *alpha, const fpa *a, const int *lda, - const fpa *b, const int *ldb, const fpc *beta, fpc *c, const int *ldc); +static void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const fpc* alpha, const fpa* a, const int* lda, + const fpa* b, const int* ldb, const fpc* beta, fpc* c, const int* ldc); template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const float *alpha, const sycl::half *a, const int *lda, - const sycl::half *b, const int *ldb, const float *beta, float *c, const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const float* alpha, const sycl::half* a, const int* lda, + const sycl::half* b, const int* ldb, const float* beta, float* c, const int* ldc) { // Not supported in NETLIB. SGEMM is used as reference. int sizea, sizeb; if (layout == CblasColMajor) { @@ -230,8 +230,8 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c sizea = (transa == CblasNoTrans) ? *lda * *m : *lda * *k; sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; } - float *af = (float *)oneapi::math::aligned_alloc(64, sizeof(float) * sizea); - float *bf = (float *)oneapi::math::aligned_alloc(64, sizeof(float) * sizeb); + float* af = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizea); + float* bf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizeb); copy_mat(a, layout, transa, *m, *k, *lda, af); copy_mat(b, layout, transb, *k, *n, *ldb, bf); cblas_sgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, af, *lda, bf, *ldb, *beta, c, @@ -241,10 +241,10 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const float *alpha, const oneapi::math::bfloat16 *a, - const int *lda, const oneapi::math::bfloat16 *b, const int *ldb, const float *beta, - float *c, const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const float* alpha, const oneapi::math::bfloat16* a, + const int* lda, const oneapi::math::bfloat16* b, const int* ldb, const float* beta, + float* c, const int* ldc) { // Not supported in NETLIB. SGEMM is used as reference. int sizea, sizeb; if (layout == CblasColMajor) { @@ -255,8 +255,8 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c sizea = (transa == CblasNoTrans) ? *lda * *m : *lda * *k; sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; } - float *af = (float *)oneapi::math::aligned_alloc(64, sizeof(float) * sizea); - float *bf = (float *)oneapi::math::aligned_alloc(64, sizeof(float) * sizeb); + float* af = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizea); + float* bf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizeb); copy_mat(a, layout, transa, *m, *k, *lda, af); copy_mat(b, layout, transb, *k, *n, *ldb, bf); cblas_sgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, af, *lda, bf, *ldb, *beta, c, @@ -266,1146 +266,1142 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c } template -static void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, - const int *n, const fp *alpha, const fp *a, const int *lda, const fp *b, - const int *ldb, const fp *beta, fp *c, const int *ldc); +static void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, + const int* n, const fp* alpha, const fp* a, const int* lda, const fp* b, + const int* ldb, const fp* beta, fp* c, const int* ldc); template <> -void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc) { +void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const float* alpha, const float* a, const int* lda, const float* b, const int* ldb, + const float* beta, float* c, const int* ldc) { cblas_ssymm_wrapper(layout, left_right, uplo, *m, *n, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc) { +void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const double* alpha, const double* a, const int* lda, const double* b, const int* ldb, + const double* beta, double* c, const int* ldc) { cblas_dsymm_wrapper(layout, left_right, uplo, *m, *n, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_csymm_wrapper(layout, left_right, uplo, *m, *n, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_csymm_wrapper(layout, left_right, uplo, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template <> -void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_zsymm_wrapper(layout, left_right, uplo, *m, *n, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_zsymm_wrapper(layout, left_right, uplo, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template -static void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, - const int *k, const fp *alpha, const fp *a, const int *lda, const fp *beta, fp *c, - const int *ldc); +static void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, + const int* k, const fp* alpha, const fp* a, const int* lda, const fp* beta, fp* c, + const int* ldc); template <> -void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *beta, float *c, - const int *ldc) { +void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const float* alpha, const float* a, const int* lda, const float* beta, float* c, + const int* ldc) { cblas_ssyrk_wrapper(layout, uplo, trans, *n, *k, *alpha, a, *lda, *beta, c, *ldc); } template <> -void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *beta, double *c, - const int *ldc) { +void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, const double* beta, double* c, + const int* ldc) { cblas_dsyrk_wrapper(layout, uplo, trans, *n, *k, *alpha, a, *lda, *beta, c, *ldc); } template <> -void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *beta, std::complex *c, const int *ldc) { - cblas_csyrk_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)beta, (void *)c, *ldc); +void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* beta, std::complex* c, const int* ldc) { + cblas_csyrk_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)beta, (void*)c, *ldc); } template <> -void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *beta, std::complex *c, const int *ldc) { - cblas_zsyrk_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)beta, (void *)c, *ldc); +void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* beta, std::complex* c, const int* ldc) { + cblas_zsyrk_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)beta, (void*)c, *ldc); } template -static void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, - const int *n, const fp *alpha, const fp *a, const int *lda, const fp *b, - const int *ldb, const fp *beta, fp *c, const int *ldc); +static void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, + const int* n, const fp* alpha, const fp* a, const int* lda, const fp* b, + const int* ldb, const fp* beta, fp* c, const int* ldc); template <> -void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_chemm_wrapper(layout, left_right, uplo, *m, *n, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_chemm_wrapper(layout, left_right, uplo, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template <> -void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_zhemm_wrapper(layout, left_right, uplo, *m, *n, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_zhemm_wrapper(layout, left_right, uplo, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template -static void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, - const int *k, const fp_scalar *alpha, const fp_data *a, const int *lda, - const fp_scalar *beta, fp_data *c, const int *ldc); +static void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, + const int* k, const fp_scalar* alpha, const fp_data* a, const int* lda, + const fp_scalar* beta, fp_data* c, const int* ldc); template <> -void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const float *alpha, const std::complex *a, const int *lda, const float *beta, - std::complex *c, const int *ldc) { - cblas_cherk_wrapper(layout, uplo, trans, *n, *k, *alpha, (const void *)a, *lda, *beta, - (void *)c, *ldc); +void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const float* alpha, const std::complex* a, const int* lda, const float* beta, + std::complex* c, const int* ldc) { + cblas_cherk_wrapper(layout, uplo, trans, *n, *k, *alpha, (const void*)a, *lda, *beta, (void*)c, + *ldc); } template <> -void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const double *alpha, const std::complex *a, const int *lda, const double *beta, - std::complex *c, const int *ldc) { - cblas_zherk_wrapper(layout, uplo, trans, *n, *k, *alpha, (const void *)a, *lda, *beta, - (void *)c, *ldc); +void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const double* alpha, const std::complex* a, const int* lda, const double* beta, + std::complex* c, const int* ldc) { + cblas_zherk_wrapper(layout, uplo, trans, *n, *k, *alpha, (const void*)a, *lda, *beta, (void*)c, + *ldc); } template -static void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, - const int *k, const fp *alpha, const fp *a, const int *lda, const fp *b, - const int *ldb, const fp *beta, fp *c, const int *ldc); +static void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, + const int* k, const fp* alpha, const fp* a, const int* lda, const fp* b, + const int* ldb, const fp* beta, fp* c, const int* ldc); template <> -void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc) { +void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const float* alpha, const float* a, const int* lda, const float* b, const int* ldb, + const float* beta, float* c, const int* ldc) { cblas_ssyr2k_wrapper(layout, uplo, trans, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc) { +void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, const double* b, const int* ldb, + const double* beta, double* c, const int* ldc) { cblas_dsyr2k_wrapper(layout, uplo, trans, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_csyr2k_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_csyr2k_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template <> -void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_zsyr2k_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_zsyr2k_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template -static void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, - const int *k, const fp_data *alpha, const fp_data *a, const int *lda, - const fp_data *b, const int *ldb, const fp_scalar *beta, fp_data *c, - const int *ldc); +static void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, + const int* k, const fp_data* alpha, const fp_data* a, const int* lda, + const fp_data* b, const int* ldb, const fp_scalar* beta, fp_data* c, + const int* ldc); template <> -void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const float *beta, std::complex *c, - const int *ldc) { - cblas_cher2k_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)b, *ldb, *beta, (void *)c, *ldc); +void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const float* beta, std::complex* c, + const int* ldc) { + cblas_cher2k_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, *beta, (void*)c, *ldc); } template <> -void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const double *beta, - std::complex *c, const int *ldc) { - cblas_zher2k_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)b, *ldb, *beta, (void *)c, *ldc); +void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const double* beta, + std::complex* c, const int* ldc) { + cblas_zher2k_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, *beta, (void*)c, *ldc); } template static void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const fp *alpha, const fp *a, - const int *lda, fp *b, const int *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const fp* alpha, const fp* a, + const int* lda, fp* b, const int* ldb); template <> void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const float *alpha, const float *a, - const int *lda, float *b, const int *ldb) { + CBLAS_DIAG diag, const int* m, const int* n, const float* alpha, const float* a, + const int* lda, float* b, const int* ldb) { cblas_strmm_wrapper(layout, side, uplo, transa, diag, *m, *n, *alpha, a, *lda, b, *ldb); } template <> void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const double *alpha, const double *a, - const int *lda, double *b, const int *ldb) { + CBLAS_DIAG diag, const int* m, const int* n, const double* alpha, const double* a, + const int* lda, double* b, const int* ldb) { cblas_dtrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, *alpha, a, *lda, b, *ldb); } template <> void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, std::complex *b, const int *ldb) { - cblas_ctrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void *)alpha, - (const void *)a, *lda, (void *)b, *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const std::complex* alpha, + const std::complex* a, const int* lda, std::complex* b, const int* ldb) { + cblas_ctrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void*)alpha, + (const void*)a, *lda, (void*)b, *ldb); } template <> void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, std::complex *b, const int *ldb) { - cblas_ztrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void *)alpha, - (const void *)a, *lda, (void *)b, *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const std::complex* alpha, + const std::complex* a, const int* lda, std::complex* b, const int* ldb) { + cblas_ztrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void*)alpha, + (const void*)a, *lda, (void*)b, *ldb); } template static void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const fp *alpha, const fp *a, - const int *lda, fp *b, const int *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const fp* alpha, const fp* a, + const int* lda, fp* b, const int* ldb); template <> void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const float *alpha, const float *a, - const int *lda, float *b, const int *ldb) { + CBLAS_DIAG diag, const int* m, const int* n, const float* alpha, const float* a, + const int* lda, float* b, const int* ldb) { cblas_strsm_wrapper(layout, side, uplo, transa, diag, *m, *n, *alpha, a, *lda, b, *ldb); } template <> void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const double *alpha, const double *a, - const int *lda, double *b, const int *ldb) { + CBLAS_DIAG diag, const int* m, const int* n, const double* alpha, const double* a, + const int* lda, double* b, const int* ldb) { cblas_dtrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, *alpha, a, *lda, b, *ldb); } template <> void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, std::complex *b, const int *ldb) { - cblas_ctrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void *)alpha, - (const void *)a, *lda, (void *)b, *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const std::complex* alpha, + const std::complex* a, const int* lda, std::complex* b, const int* ldb) { + cblas_ctrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void*)alpha, + (const void*)a, *lda, (void*)b, *ldb); } template <> void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, std::complex *b, const int *ldb) { - cblas_ztrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void *)alpha, - (const void *)a, *lda, (void *)b, *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const std::complex* alpha, + const std::complex* a, const int* lda, std::complex* b, const int* ldb) { + cblas_ztrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void*)alpha, + (const void*)a, *lda, (void*)b, *ldb); } /* Level 2 */ template -static void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const fp *alpha, const fp *a, const int *lda, const fp *x, const int *incx, - const fp *beta, fp *y, const int *incy); +static void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const fp* alpha, const fp* a, const int* lda, const fp* x, const int* incx, + const fp* beta, fp* y, const int* incy); template <> -void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const float *alpha, const float *a, const int *lda, const float *x, const int *incx, - const float *beta, float *y, const int *incy) { +void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const float* alpha, const float* a, const int* lda, const float* x, const int* incx, + const float* beta, float* y, const int* incy) { cblas_sgemv_wrapper(layout, trans, *m, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, const double *x, const int *incx, - const double *beta, double *y, const int *incy) { +void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const double* alpha, const double* a, const int* lda, const double* x, const int* incx, + const double* beta, double* y, const int* incy) { cblas_dgemv_wrapper(layout, trans, *m, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_cgemv_wrapper(layout, trans, *m, *n, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_cgemv_wrapper(layout, trans, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template <> -void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zgemv_wrapper(layout, trans, *m, *n, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zgemv_wrapper(layout, trans, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template -static void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, - int *ku, const fp *alpha, const fp *a, const int *lda, const fp *x, - const int *incx, const fp *beta, fp *y, const int *incy); +static void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, + int* ku, const fp* alpha, const fp* a, const int* lda, const fp* x, + const int* incx, const fp* beta, fp* y, const int* incy); template <> -void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, int *ku, - const float *alpha, const float *a, const int *lda, const float *x, const int *incx, - const float *beta, float *y, const int *incy) { +void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, int* ku, + const float* alpha, const float* a, const int* lda, const float* x, const int* incx, + const float* beta, float* y, const int* incy) { cblas_sgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, int *ku, - const double *alpha, const double *a, const int *lda, const double *x, const int *incx, - const double *beta, double *y, const int *incy) { +void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, int* ku, + const double* alpha, const double* a, const int* lda, const double* x, const int* incx, + const double* beta, double* y, const int* incy) { cblas_dgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, int *ku, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_cgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, int* ku, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_cgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template <> -void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, int *ku, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, int* ku, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template -static void ger(CBLAS_LAYOUT layout, const int *m, const int *n, const fp *alpha, const fp *x, - const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void ger(CBLAS_LAYOUT layout, const int* m, const int* n, const fp* alpha, const fp* x, + const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void ger(CBLAS_LAYOUT layout, const int *m, const int *n, const float *alpha, const float *x, - const int *incx, const float *y, const int *incy, float *a, const int *lda) { +void ger(CBLAS_LAYOUT layout, const int* m, const int* n, const float* alpha, const float* x, + const int* incx, const float* y, const int* incy, float* a, const int* lda) { cblas_sger_wrapper(layout, *m, *n, *alpha, x, *incx, y, *incy, a, *lda); } template <> -void ger(CBLAS_LAYOUT layout, const int *m, const int *n, const double *alpha, const double *x, - const int *incx, const double *y, const int *incy, double *a, const int *lda) { +void ger(CBLAS_LAYOUT layout, const int* m, const int* n, const double* alpha, const double* x, + const int* incx, const double* y, const int* incy, double* a, const int* lda) { cblas_dger_wrapper(layout, *m, *n, *alpha, x, *incx, y, *incy, a, *lda); } template -static void gerc(CBLAS_LAYOUT layout, const int *m, const int *n, const fp *alpha, const fp *x, - const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void gerc(CBLAS_LAYOUT layout, const int* m, const int* n, const fp* alpha, const fp* x, + const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void gerc(CBLAS_LAYOUT layout, const int *m, const int *n, const std::complex *alpha, - const std::complex *x, const int *incx, const std::complex *y, - const int *incy, std::complex *a, const int *lda) { - cblas_cgerc_wrapper(layout, *m, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void gerc(CBLAS_LAYOUT layout, const int* m, const int* n, const std::complex* alpha, + const std::complex* x, const int* incx, const std::complex* y, + const int* incy, std::complex* a, const int* lda) { + cblas_cgerc_wrapper(layout, *m, *n, (const void*)alpha, (const void*)x, *incx, (const void*)y, + *incy, (void*)a, *lda); } template <> -void gerc(CBLAS_LAYOUT layout, const int *m, const int *n, const std::complex *alpha, - const std::complex *x, const int *incx, const std::complex *y, - const int *incy, std::complex *a, const int *lda) { - cblas_zgerc_wrapper(layout, *m, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void gerc(CBLAS_LAYOUT layout, const int* m, const int* n, const std::complex* alpha, + const std::complex* x, const int* incx, const std::complex* y, + const int* incy, std::complex* a, const int* lda) { + cblas_zgerc_wrapper(layout, *m, *n, (const void*)alpha, (const void*)x, *incx, (const void*)y, + *incy, (void*)a, *lda); } template -static void geru(CBLAS_LAYOUT layout, const int *m, const int *n, const fp *alpha, const fp *x, - const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void geru(CBLAS_LAYOUT layout, const int* m, const int* n, const fp* alpha, const fp* x, + const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void geru(CBLAS_LAYOUT layout, const int *m, const int *n, const std::complex *alpha, - const std::complex *x, const int *incx, const std::complex *y, - const int *incy, std::complex *a, const int *lda) { - cblas_cgeru_wrapper(layout, *m, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void geru(CBLAS_LAYOUT layout, const int* m, const int* n, const std::complex* alpha, + const std::complex* x, const int* incx, const std::complex* y, + const int* incy, std::complex* a, const int* lda) { + cblas_cgeru_wrapper(layout, *m, *n, (const void*)alpha, (const void*)x, *incx, (const void*)y, + *incy, (void*)a, *lda); } template <> -void geru(CBLAS_LAYOUT layout, const int *m, const int *n, const std::complex *alpha, - const std::complex *x, const int *incx, const std::complex *y, - const int *incy, std::complex *a, const int *lda) { - cblas_zgeru_wrapper(layout, *m, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void geru(CBLAS_LAYOUT layout, const int* m, const int* n, const std::complex* alpha, + const std::complex* x, const int* incx, const std::complex* y, + const int* incy, std::complex* a, const int* lda) { + cblas_zgeru_wrapper(layout, *m, *n, (const void*)alpha, (const void*)x, *incx, (const void*)y, + *incy, (void*)a, *lda); } template -static void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const fp *alpha, const fp *a, const int *lda, const fp *x, const int *incx, - const fp *beta, fp *y, const int *incy); +static void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const fp* alpha, const fp* a, const int* lda, const fp* x, const int* incx, + const fp* beta, fp* y, const int* incy); template <> -void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_chbmv_wrapper(layout, upper_lower, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_chbmv_wrapper(layout, upper_lower, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template <> -void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zhbmv_wrapper(layout, upper_lower, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zhbmv_wrapper(layout, upper_lower, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template -static void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *a, const int *lda, const fp *x, const int *incx, const fp *beta, fp *y, - const int *incy); +static void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* a, const int* lda, const fp* x, const int* incx, const fp* beta, fp* y, + const int* incy); template <> -void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_chemv_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_chemv_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template <> -void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zhemv_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zhemv_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template -static void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp_scalar *alpha, - const fp_data *x, const int *incx, fp_data *a, const int *lda); +static void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp_scalar* alpha, + const fp_data* x, const int* incx, fp_data* a, const int* lda); template <> -void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const std::complex *x, const int *incx, std::complex *a, const int *lda) { - cblas_cher_wrapper(layout, upper_lower, *n, *alpha, (const void *)x, *incx, (void *)a, *lda); +void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const std::complex* x, const int* incx, std::complex* a, const int* lda) { + cblas_cher_wrapper(layout, upper_lower, *n, *alpha, (const void*)x, *incx, (void*)a, *lda); } template <> -void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const std::complex *x, const int *incx, std::complex *a, const int *lda) { - cblas_zher_wrapper(layout, upper_lower, *n, *alpha, (const void *)x, *incx, (void *)a, *lda); +void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const std::complex* x, const int* incx, std::complex* a, const int* lda) { + cblas_zher_wrapper(layout, upper_lower, *n, *alpha, (const void*)x, *incx, (void*)a, *lda); } template -static void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *x, const int *incx, - const std::complex *y, const int *incy, std::complex *a, const int *lda) { - cblas_cher2_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* x, const int* incx, + const std::complex* y, const int* incy, std::complex* a, const int* lda) { + cblas_cher2_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)x, *incx, + (const void*)y, *incy, (void*)a, *lda); } template <> -void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *x, const int *incx, - const std::complex *y, const int *incy, std::complex *a, const int *lda) { - cblas_zher2_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* x, const int* incx, + const std::complex* y, const int* incy, std::complex* a, const int* lda) { + cblas_zher2_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)x, *incx, + (const void*)y, *incy, (void*)a, *lda); } template -static void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *a, const fp *x, const int *incx, const fp *beta, fp *y, const int *incy); +static void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* a, const fp* x, const int* incx, const fp* beta, fp* y, const int* incy); template <> -void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *a, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_chpmv_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)a, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* a, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_chpmv_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)a, (const void*)x, + *incx, (const void*)beta, (void*)y, *incy); } template <> -void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *a, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zhpmv_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)a, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* a, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zhpmv_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)a, (const void*)x, + *incx, (const void*)beta, (void*)y, *incy); } template -static void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp_scalar *alpha, - const fp_data *x, const int *incx, fp_data *a); +static void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp_scalar* alpha, + const fp_data* x, const int* incx, fp_data* a); template <> -void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const std::complex *x, const int *incx, std::complex *a) { - cblas_chpr_wrapper(layout, upper_lower, *n, *alpha, (const void *)x, *incx, (void *)a); +void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const std::complex* x, const int* incx, std::complex* a) { + cblas_chpr_wrapper(layout, upper_lower, *n, *alpha, (const void*)x, *incx, (void*)a); } template <> -void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const std::complex *x, const int *incx, std::complex *a) { - cblas_zhpr_wrapper(layout, upper_lower, *n, *alpha, (const void *)x, *incx, (void *)a); +void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const std::complex* x, const int* incx, std::complex* a) { + cblas_zhpr_wrapper(layout, upper_lower, *n, *alpha, (const void*)x, *incx, (void*)a); } template -static void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, const fp *y, const int *incy, fp *a); +static void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, const fp* y, const int* incy, fp* a); template <> -void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *x, const int *incx, - const std::complex *y, const int *incy, std::complex *a) { - cblas_chpr2_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a); +void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* x, const int* incx, + const std::complex* y, const int* incy, std::complex* a) { + cblas_chpr2_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)x, *incx, + (const void*)y, *incy, (void*)a); } template <> -void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *x, const int *incx, - const std::complex *y, const int *incy, std::complex *a) { - cblas_zhpr2_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a); +void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* x, const int* incx, + const std::complex* y, const int* incy, std::complex* a) { + cblas_zhpr2_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)x, *incx, + (const void*)y, *incy, (void*)a); } template -static void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const fp *alpha, const fp *a, const int *lda, const fp *x, const int *incx, - const fp *beta, fp *y, const int *incy); +static void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const fp* alpha, const fp* a, const int* lda, const fp* x, const int* incx, + const fp* beta, fp* y, const int* incy); template <> -void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *x, const int *incx, - const float *beta, float *y, const int *incy) { +void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const float* alpha, const float* a, const int* lda, const float* x, const int* incx, + const float* beta, float* y, const int* incy) { cblas_ssbmv_wrapper(layout, upper_lower, *n, *k, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *x, const int *incx, - const double *beta, double *y, const int *incy) { +void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, const double* x, const int* incx, + const double* beta, double* y, const int* incy) { cblas_dsbmv_wrapper(layout, upper_lower, *n, *k, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template -static void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *a, const int *lda, const fp *x, const int *incx, const fp *beta, fp *y, - const int *incy); +static void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* a, const int* lda, const fp* x, const int* incx, const fp* beta, fp* y, + const int* incy); template <> -void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *a, const int *lda, const float *x, const int *incx, const float *beta, - float *y, const int *incy) { +void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* a, const int* lda, const float* x, const int* incx, const float* beta, + float* y, const int* incy) { cblas_ssymv_wrapper(layout, upper_lower, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *a, const int *lda, const double *x, const int *incx, const double *beta, - double *y, const int *incy) { +void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* a, const int* lda, const double* x, const int* incx, const double* beta, + double* y, const int* incy) { cblas_dsymv_wrapper(layout, upper_lower, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template -static void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, fp *a, const int *lda); +static void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, fp* a, const int* lda); template <> -void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *x, const int *incx, float *a, const int *lda) { +void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* x, const int* incx, float* a, const int* lda) { cblas_ssyr_wrapper(layout, upper_lower, *n, *alpha, x, *incx, a, *lda); } template <> -void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *x, const int *incx, double *a, const int *lda) { +void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* x, const int* incx, double* a, const int* lda) { cblas_dsyr_wrapper(layout, upper_lower, *n, *alpha, x, *incx, a, *lda); } template -static void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *x, const int *incx, const float *y, const int *incy, float *a, - const int *lda) { +void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* x, const int* incx, const float* y, const int* incy, float* a, + const int* lda) { cblas_ssyr2_wrapper(layout, upper_lower, *n, *alpha, x, *incx, y, *incy, a, *lda); } template <> -void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *x, const int *incx, const double *y, const int *incy, double *a, - const int *lda) { +void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* x, const int* incx, const double* y, const int* incy, double* a, + const int* lda) { cblas_dsyr2_wrapper(layout, upper_lower, *n, *alpha, x, *incx, y, *incy, a, *lda); } template -static void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *a, const fp *x, const int *incx, const fp *beta, fp *y, const int *incy); +static void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* a, const fp* x, const int* incx, const fp* beta, fp* y, const int* incy); template <> -void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *a, const float *x, const int *incx, const float *beta, float *y, - const int *incy) { +void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* a, const float* x, const int* incx, const float* beta, float* y, + const int* incy) { cblas_sspmv_wrapper(layout, upper_lower, *n, *alpha, a, x, *incx, *beta, y, *incy); } template <> -void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *a, const double *x, const int *incx, const double *beta, double *y, - const int *incy) { +void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* a, const double* x, const int* incx, const double* beta, double* y, + const int* incy) { cblas_dspmv_wrapper(layout, upper_lower, *n, *alpha, a, x, *incx, *beta, y, *incy); } template -static void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, fp *a); +static void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, fp* a); template <> -void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *x, const int *incx, float *a) { +void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* x, const int* incx, float* a) { cblas_sspr_wrapper(layout, upper_lower, *n, *alpha, x, *incx, a); } template <> -void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *x, const int *incx, double *a) { +void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* x, const int* incx, double* a) { cblas_dspr_wrapper(layout, upper_lower, *n, *alpha, x, *incx, a); } template -static void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, const fp *y, const int *incy, fp *a); +static void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, const fp* y, const int* incy, fp* a); template <> -void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *x, const int *incx, const float *y, const int *incy, float *a) { +void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* x, const int* incx, const float* y, const int* incy, float* a) { cblas_sspr2_wrapper(layout, upper_lower, *n, *alpha, x, *incx, y, *incy, a); } template <> -void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *x, const int *incx, const double *y, const int *incy, double *a) { +void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* x, const int* incx, const double* y, const int* incy, double* a) { cblas_dspr2_wrapper(layout, upper_lower, *n, *alpha, x, *incx, y, *incy, a); } template static void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const int *k, const fp *a, const int *lda, - fp *x, const int *incx); + CBLAS_DIAG unit_diag, const int* n, const int* k, const fp* a, const int* lda, + fp* x, const int* incx); template <> void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const float *a, const int *lda, float *x, const int *incx) { + const int* n, const int* k, const float* a, const int* lda, float* x, const int* incx) { cblas_stbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, a, *lda, x, *incx); } template <> void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const double *a, const int *lda, double *x, const int *incx) { + const int* n, const int* k, const double* a, const int* lda, double* x, const int* incx) { cblas_dtbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, a, *lda, x, *incx); } template <> void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const std::complex *a, const int *lda, - std::complex *x, const int *incx) { - cblas_ctbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void *)a, *lda, - (void *)x, *incx); + const int* n, const int* k, const std::complex* a, const int* lda, + std::complex* x, const int* incx) { + cblas_ctbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void*)a, *lda, + (void*)x, *incx); } template <> void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const std::complex *a, const int *lda, - std::complex *x, const int *incx) { - cblas_ztbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void *)a, *lda, - (void *)x, *incx); + const int* n, const int* k, const std::complex* a, const int* lda, + std::complex* x, const int* incx) { + cblas_ztbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void*)a, *lda, + (void*)x, *incx); } template static void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const int *k, const fp *a, const int *lda, - fp *x, const int *incx); + CBLAS_DIAG unit_diag, const int* n, const int* k, const fp* a, const int* lda, + fp* x, const int* incx); template <> void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const float *a, const int *lda, float *x, const int *incx) { + const int* n, const int* k, const float* a, const int* lda, float* x, const int* incx) { cblas_stbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, a, *lda, x, *incx); } template <> void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const double *a, const int *lda, double *x, const int *incx) { + const int* n, const int* k, const double* a, const int* lda, double* x, const int* incx) { cblas_dtbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, a, *lda, x, *incx); } template <> void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const std::complex *a, const int *lda, - std::complex *x, const int *incx) { - cblas_ctbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void *)a, *lda, - (void *)x, *incx); + const int* n, const int* k, const std::complex* a, const int* lda, + std::complex* x, const int* incx) { + cblas_ctbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void*)a, *lda, + (void*)x, *incx); } template <> void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const std::complex *a, const int *lda, - std::complex *x, const int *incx) { - cblas_ztbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void *)a, *lda, - (void *)x, *incx); + const int* n, const int* k, const std::complex* a, const int* lda, + std::complex* x, const int* incx) { + cblas_ztbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void*)a, *lda, + (void*)x, *incx); } template static void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const fp *a, fp *x, const int *incx); + CBLAS_DIAG unit_diag, const int* n, const fp* a, fp* x, const int* incx); template <> void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const float *a, float *x, const int *incx) { + const int* n, const float* a, float* x, const int* incx) { cblas_stpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, x, *incx); } template <> void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const double *a, double *x, const int *incx) { + const int* n, const double* a, double* x, const int* incx) { cblas_dtpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, x, *incx); } template <> void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, std::complex *x, const int *incx) { - cblas_ctpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, (void *)x, - *incx); + const int* n, const std::complex* a, std::complex* x, const int* incx) { + cblas_ctpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, (void*)x, *incx); } template <> void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, std::complex *x, const int *incx) { - cblas_ztpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, (void *)x, - *incx); + const int* n, const std::complex* a, std::complex* x, const int* incx) { + cblas_ztpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, (void*)x, *incx); } template static void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const fp *a, fp *x, const int *incx); + CBLAS_DIAG unit_diag, const int* n, const fp* a, fp* x, const int* incx); template <> void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const float *a, float *x, const int *incx) { + const int* n, const float* a, float* x, const int* incx) { cblas_stpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, x, *incx); } template <> void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const double *a, double *x, const int *incx) { + const int* n, const double* a, double* x, const int* incx) { cblas_dtpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, x, *incx); } template <> void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, std::complex *x, const int *incx) { - cblas_ctpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, (void *)x, - *incx); + const int* n, const std::complex* a, std::complex* x, const int* incx) { + cblas_ctpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, (void*)x, *incx); } template <> void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, std::complex *x, const int *incx) { - cblas_ztpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, (void *)x, - *incx); + const int* n, const std::complex* a, std::complex* x, const int* incx) { + cblas_ztpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, (void*)x, *incx); } template static void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const fp *a, const int *lda, fp *x, - const int *incx); + CBLAS_DIAG unit_diag, const int* n, const fp* a, const int* lda, fp* x, + const int* incx); template <> void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const float *a, const int *lda, float *x, const int *incx) { + const int* n, const float* a, const int* lda, float* x, const int* incx) { cblas_strmv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, *lda, x, *incx); } template <> void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const double *a, const int *lda, double *x, const int *incx) { + const int* n, const double* a, const int* lda, double* x, const int* incx) { cblas_dtrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, *lda, x, *incx); } template <> void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, const int *lda, std::complex *x, - const int *incx) { - cblas_ctrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, *lda, (void *)x, + const int* n, const std::complex* a, const int* lda, std::complex* x, + const int* incx) { + cblas_ctrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, *lda, (void*)x, *incx); } template <> void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, const int *lda, std::complex *x, - const int *incx) { - cblas_ztrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, *lda, (void *)x, + const int* n, const std::complex* a, const int* lda, std::complex* x, + const int* incx) { + cblas_ztrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, *lda, (void*)x, *incx); } template static void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const fp *a, const int *lda, fp *x, - const int *incx); + CBLAS_DIAG unit_diag, const int* n, const fp* a, const int* lda, fp* x, + const int* incx); template <> void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const float *a, const int *lda, float *x, const int *incx) { + const int* n, const float* a, const int* lda, float* x, const int* incx) { cblas_strsv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, *lda, x, *incx); } template <> void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const double *a, const int *lda, double *x, const int *incx) { + const int* n, const double* a, const int* lda, double* x, const int* incx) { cblas_dtrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, *lda, x, *incx); } template <> void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, const int *lda, std::complex *x, - const int *incx) { - cblas_ctrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, *lda, (void *)x, + const int* n, const std::complex* a, const int* lda, std::complex* x, + const int* incx) { + cblas_ctrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, *lda, (void*)x, *incx); } template <> void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, const int *lda, std::complex *x, - const int *incx) { - cblas_ztrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, *lda, (void *)x, + const int* n, const std::complex* a, const int* lda, std::complex* x, + const int* incx) { + cblas_ztrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, *lda, (void*)x, *incx); } /* Level 1 */ template -static fp_res asum(const int *n, const fp_data *x, const int *incx); +static fp_res asum(const int* n, const fp_data* x, const int* incx); template <> -float asum(const int *n, const float *x, const int *incx) { +float asum(const int* n, const float* x, const int* incx) { return cblas_sasum_wrapper(*n, x, *incx); } template <> -double asum(const int *n, const double *x, const int *incx) { +double asum(const int* n, const double* x, const int* incx) { return cblas_dasum_wrapper(*n, x, *incx); } template <> -float asum(const int *n, const std::complex *x, const int *incx) { - return cblas_scasum_wrapper(*n, (const void *)x, *incx); +float asum(const int* n, const std::complex* x, const int* incx) { + return cblas_scasum_wrapper(*n, (const void*)x, *incx); } template <> -double asum(const int *n, const std::complex *x, const int *incx) { - return cblas_dzasum_wrapper(*n, (const void *)x, *incx); +double asum(const int* n, const std::complex* x, const int* incx) { + return cblas_dzasum_wrapper(*n, (const void*)x, *incx); } template -static void axpy(const int *n, const fp *alpha, const fp *x, const int *incx, fp *y, - const int *incy); +static void axpy(const int* n, const fp* alpha, const fp* x, const int* incx, fp* y, + const int* incy); template <> -void axpy(const int *n, const float *alpha, const float *x, const int *incx, float *y, - const int *incy) { +void axpy(const int* n, const float* alpha, const float* x, const int* incx, float* y, + const int* incy) { cblas_saxpy_wrapper(*n, *alpha, x, *incx, y, *incy); } template <> -void axpy(const int *n, const double *alpha, const double *x, const int *incx, double *y, - const int *incy) { +void axpy(const int* n, const double* alpha, const double* x, const int* incx, double* y, + const int* incy) { cblas_daxpy_wrapper(*n, *alpha, x, *incx, y, *incy); } template <> -void axpy(const int *n, const std::complex *alpha, const std::complex *x, - const int *incx, std::complex *y, const int *incy) { - cblas_caxpy_wrapper(*n, (const void *)alpha, (const void *)x, *incx, (void *)y, *incy); +void axpy(const int* n, const std::complex* alpha, const std::complex* x, + const int* incx, std::complex* y, const int* incy) { + cblas_caxpy_wrapper(*n, (const void*)alpha, (const void*)x, *incx, (void*)y, *incy); } template <> -void axpy(const int *n, const std::complex *alpha, const std::complex *x, - const int *incx, std::complex *y, const int *incy) { - cblas_zaxpy_wrapper(*n, (const void *)alpha, (const void *)x, *incx, (void *)y, *incy); +void axpy(const int* n, const std::complex* alpha, const std::complex* x, + const int* incx, std::complex* y, const int* incy) { + cblas_zaxpy_wrapper(*n, (const void*)alpha, (const void*)x, *incx, (void*)y, *incy); } template -static void copy(const int *n, const fp *x, const int *incx, fp *y, const int *incy); +static void copy(const int* n, const fp* x, const int* incx, fp* y, const int* incy); template <> -void copy(const int *n, const float *x, const int *incx, float *y, const int *incy) { +void copy(const int* n, const float* x, const int* incx, float* y, const int* incy) { cblas_scopy_wrapper(*n, x, *incx, y, *incy); } template <> -void copy(const int *n, const double *x, const int *incx, double *y, const int *incy) { +void copy(const int* n, const double* x, const int* incx, double* y, const int* incy) { cblas_dcopy_wrapper(*n, x, *incx, y, *incy); } template <> -void copy(const int *n, const std::complex *x, const int *incx, std::complex *y, - const int *incy) { - cblas_ccopy_wrapper(*n, (const void *)x, *incx, (void *)y, *incy); +void copy(const int* n, const std::complex* x, const int* incx, std::complex* y, + const int* incy) { + cblas_ccopy_wrapper(*n, (const void*)x, *incx, (void*)y, *incy); } template <> -void copy(const int *n, const std::complex *x, const int *incx, std::complex *y, - const int *incy) { - cblas_zcopy_wrapper(*n, (const void *)x, *incx, (void *)y, *incy); +void copy(const int* n, const std::complex* x, const int* incx, std::complex* y, + const int* incy) { + cblas_zcopy_wrapper(*n, (const void*)x, *incx, (void*)y, *incy); } template -static fp_res dot(const int *n, const fp *x, const int *incx, const fp *y, const int *incy); +static fp_res dot(const int* n, const fp* x, const int* incx, const fp* y, const int* incy); template <> -float dot(const int *n, const float *x, const int *incx, const float *y, const int *incy) { +float dot(const int* n, const float* x, const int* incx, const float* y, const int* incy) { return cblas_sdot_wrapper(*n, x, *incx, y, *incy); } template <> -double dot(const int *n, const double *x, const int *incx, const double *y, const int *incy) { +double dot(const int* n, const double* x, const int* incx, const double* y, const int* incy) { return cblas_ddot_wrapper(*n, x, *incx, y, *incy); } template <> -double dot(const int *n, const float *x, const int *incx, const float *y, const int *incy) { +double dot(const int* n, const float* x, const int* incx, const float* y, const int* incy) { return cblas_dsdot_wrapper(*n, x, *incx, y, *incy); } -static float sdsdot(const int *n, const float *sb, const float *x, const int *incx, const float *y, - const int *incy) { +static float sdsdot(const int* n, const float* sb, const float* x, const int* incx, const float* y, + const int* incy) { return cblas_sdsdot_wrapper(*n, *sb, x, *incx, y, *incy); } template -static fp_res nrm2(const int *n, const fp *x, const int *incx); +static fp_res nrm2(const int* n, const fp* x, const int* incx); template <> -float nrm2(const int *n, const float *x, const int *incx) { +float nrm2(const int* n, const float* x, const int* incx) { return cblas_snrm2_wrapper(*n, x, *incx); } template <> -double nrm2(const int *n, const double *x, const int *incx) { +double nrm2(const int* n, const double* x, const int* incx) { return cblas_dnrm2_wrapper(*n, x, *incx); } template <> -float nrm2(const int *n, const std::complex *x, const int *incx) { - return cblas_scnrm2_wrapper(*n, (const void *)x, *incx); +float nrm2(const int* n, const std::complex* x, const int* incx) { + return cblas_scnrm2_wrapper(*n, (const void*)x, *incx); } template <> -double nrm2(const int *n, const std::complex *x, const int *incx) { - return cblas_dznrm2_wrapper(*n, (const void *)x, *incx); +double nrm2(const int* n, const std::complex* x, const int* incx) { + return cblas_dznrm2_wrapper(*n, (const void*)x, *incx); } template -static void rot(const int *n, fp *x, const int *incx, fp *y, const int *incy, const fp_scalar *c, - const fp_scalar *s); +static void rot(const int* n, fp* x, const int* incx, fp* y, const int* incy, const fp_scalar* c, + const fp_scalar* s); template <> -void rot(const int *n, float *x, const int *incx, float *y, const int *incy, const float *c, - const float *s) { +void rot(const int* n, float* x, const int* incx, float* y, const int* incy, const float* c, + const float* s) { cblas_srot_wrapper(*n, x, *incx, y, *incy, *c, *s); } template <> -void rot(const int *n, double *x, const int *incx, double *y, const int *incy, const double *c, - const double *s) { +void rot(const int* n, double* x, const int* incx, double* y, const int* incy, const double* c, + const double* s) { cblas_drot_wrapper(*n, x, *incx, y, *incy, *c, *s); } template <> -void rot(const int *n, std::complex *x, const int *incx, std::complex *y, - const int *incy, const float *c, const float *s) { - csrot_wrapper(n, (void *)x, incx, (void *)y, incy, c, s); +void rot(const int* n, std::complex* x, const int* incx, std::complex* y, + const int* incy, const float* c, const float* s) { + csrot_wrapper(n, (void*)x, incx, (void*)y, incy, c, s); } template <> -void rot(const int *n, std::complex *x, const int *incx, std::complex *y, - const int *incy, const double *c, const double *s) { - zdrot_wrapper(n, (void *)x, incx, (void *)y, incy, c, s); +void rot(const int* n, std::complex* x, const int* incx, std::complex* y, + const int* incy, const double* c, const double* s) { + zdrot_wrapper(n, (void*)x, incx, (void*)y, incy, c, s); } template -static void rotg(fp *a, fp *b, fp_c *c, fp *s); +static void rotg(fp* a, fp* b, fp_c* c, fp* s); template <> -void rotg(float *a, float *b, float *c, float *s) { +void rotg(float* a, float* b, float* c, float* s) { cblas_srotg_wrapper(a, b, c, s); } template <> -void rotg(double *a, double *b, double *c, double *s) { +void rotg(double* a, double* b, double* c, double* s) { cblas_drotg_wrapper(a, b, c, s); } template <> -void rotg(std::complex *a, std::complex *b, float *c, std::complex *s) { - crotg_wrapper((void *)a, (void *)b, c, (void *)s); +void rotg(std::complex* a, std::complex* b, float* c, std::complex* s) { + crotg_wrapper((void*)a, (void*)b, c, (void*)s); } template <> -void rotg(std::complex *a, std::complex *b, double *c, std::complex *s) { - zrotg_wrapper((void *)a, (void *)b, c, (void *)s); +void rotg(std::complex* a, std::complex* b, double* c, std::complex* s) { + zrotg_wrapper((void*)a, (void*)b, c, (void*)s); } template -static void rotm(const int *n, fp *x, const int *incx, fp *y, const int *incy, const fp *param); +static void rotm(const int* n, fp* x, const int* incx, fp* y, const int* incy, const fp* param); template <> -void rotm(const int *n, float *x, const int *incx, float *y, const int *incy, const float *param) { +void rotm(const int* n, float* x, const int* incx, float* y, const int* incy, const float* param) { cblas_srotm_wrapper(*n, x, *incx, y, *incy, param); } template <> -void rotm(const int *n, double *x, const int *incx, double *y, const int *incy, - const double *param) { +void rotm(const int* n, double* x, const int* incx, double* y, const int* incy, + const double* param) { cblas_drotm_wrapper(*n, x, *incx, y, *incy, param); } template -static void rotmg(fp *d1, fp *d2, fp *x1, fp *y1, fp *param); +static void rotmg(fp* d1, fp* d2, fp* x1, fp* y1, fp* param); template <> -void rotmg(float *d1, float *d2, float *x1, float *y1, float *param) { +void rotmg(float* d1, float* d2, float* x1, float* y1, float* param) { cblas_srotmg_wrapper(d1, d2, x1, *y1, param); } template <> -void rotmg(double *d1, double *d2, double *x1, double *y1, double *param) { +void rotmg(double* d1, double* d2, double* x1, double* y1, double* param) { cblas_drotmg_wrapper(d1, d2, x1, *y1, param); } template -static void scal(const int *n, const fp_scalar *alpha, fp_data *x, const int *incx); +static void scal(const int* n, const fp_scalar* alpha, fp_data* x, const int* incx); template <> -void scal(const int *n, const float *alpha, float *x, const int *incx) { +void scal(const int* n, const float* alpha, float* x, const int* incx) { cblas_sscal_wrapper(*n, *alpha, x, *incx); } template <> -void scal(const int *n, const double *alpha, double *x, const int *incx) { +void scal(const int* n, const double* alpha, double* x, const int* incx) { cblas_dscal_wrapper(*n, *alpha, x, *incx); } template <> -void scal(const int *n, const std::complex *alpha, std::complex *x, const int *incx) { - cblas_cscal_wrapper(*n, (const void *)alpha, (void *)x, *incx); +void scal(const int* n, const std::complex* alpha, std::complex* x, const int* incx) { + cblas_cscal_wrapper(*n, (const void*)alpha, (void*)x, *incx); } template <> -void scal(const int *n, const std::complex *alpha, std::complex *x, - const int *incx) { - cblas_zscal_wrapper(*n, (const void *)alpha, (void *)x, *incx); +void scal(const int* n, const std::complex* alpha, std::complex* x, + const int* incx) { + cblas_zscal_wrapper(*n, (const void*)alpha, (void*)x, *incx); } template <> -void scal(const int *n, const float *alpha, std::complex *x, const int *incx) { - cblas_csscal_wrapper(*n, *alpha, (void *)x, *incx); +void scal(const int* n, const float* alpha, std::complex* x, const int* incx) { + cblas_csscal_wrapper(*n, *alpha, (void*)x, *incx); } template <> -void scal(const int *n, const double *alpha, std::complex *x, const int *incx) { - cblas_zdscal_wrapper(*n, *alpha, (void *)x, *incx); +void scal(const int* n, const double* alpha, std::complex* x, const int* incx) { + cblas_zdscal_wrapper(*n, *alpha, (void*)x, *incx); } template -static void swap(const int *n, fp *x, const int *incx, fp *y, const int *incy); +static void swap(const int* n, fp* x, const int* incx, fp* y, const int* incy); template <> -void swap(const int *n, float *x, const int *incx, float *y, const int *incy) { +void swap(const int* n, float* x, const int* incx, float* y, const int* incy) { cblas_sswap_wrapper(*n, x, *incx, y, *incy); } template <> -void swap(const int *n, double *x, const int *incx, double *y, const int *incy) { +void swap(const int* n, double* x, const int* incx, double* y, const int* incy) { cblas_dswap_wrapper(*n, x, *incx, y, *incy); } template <> -void swap(const int *n, std::complex *x, const int *incx, std::complex *y, - const int *incy) { - cblas_cswap_wrapper(*n, (void *)x, *incx, (void *)y, *incy); +void swap(const int* n, std::complex* x, const int* incx, std::complex* y, + const int* incy) { + cblas_cswap_wrapper(*n, (void*)x, *incx, (void*)y, *incy); } template <> -void swap(const int *n, std::complex *x, const int *incx, std::complex *y, - const int *incy) { - cblas_zswap_wrapper(*n, (void *)x, *incx, (void *)y, *incy); +void swap(const int* n, std::complex* x, const int* incx, std::complex* y, + const int* incy) { + cblas_zswap_wrapper(*n, (void*)x, *incx, (void*)y, *incy); } template -static void dotc(fp *pres, const int *n, const fp *x, const int *incx, const fp *y, - const int *incy); +static void dotc(fp* pres, const int* n, const fp* x, const int* incx, const fp* y, + const int* incy); template <> -void dotc(std::complex *pres, const int *n, const std::complex *x, const int *incx, - const std::complex *y, const int *incy) { - cblas_cdotc_sub_wrapper(*n, (const void *)x, *incx, (const void *)y, *incy, (void *)pres); +void dotc(std::complex* pres, const int* n, const std::complex* x, const int* incx, + const std::complex* y, const int* incy) { + cblas_cdotc_sub_wrapper(*n, (const void*)x, *incx, (const void*)y, *incy, (void*)pres); } template <> -void dotc(std::complex *pres, const int *n, const std::complex *x, const int *incx, - const std::complex *y, const int *incy) { - cblas_zdotc_sub_wrapper(*n, (const void *)x, *incx, (const void *)y, *incy, (void *)pres); +void dotc(std::complex* pres, const int* n, const std::complex* x, const int* incx, + const std::complex* y, const int* incy) { + cblas_zdotc_sub_wrapper(*n, (const void*)x, *incx, (const void*)y, *incy, (void*)pres); } template -static void dotu(fp *pres, const int *n, const fp *x, const int *incx, const fp *y, - const int *incy); +static void dotu(fp* pres, const int* n, const fp* x, const int* incx, const fp* y, + const int* incy); template <> -void dotu(std::complex *pres, const int *n, const std::complex *x, const int *incx, - const std::complex *y, const int *incy) { - cblas_cdotu_sub_wrapper(*n, (const void *)x, *incx, (const void *)y, *incy, (void *)pres); +void dotu(std::complex* pres, const int* n, const std::complex* x, const int* incx, + const std::complex* y, const int* incy) { + cblas_cdotu_sub_wrapper(*n, (const void*)x, *incx, (const void*)y, *incy, (void*)pres); } template <> -void dotu(std::complex *pres, const int *n, const std::complex *x, const int *incx, - const std::complex *y, const int *incy) { - cblas_zdotu_sub_wrapper(*n, (const void *)x, *incx, (const void *)y, *incy, (void *)pres); +void dotu(std::complex* pres, const int* n, const std::complex* x, const int* incx, + const std::complex* y, const int* incy) { + cblas_zdotu_sub_wrapper(*n, (const void*)x, *incx, (const void*)y, *incy, (void*)pres); } template -static int iamax(const int *n, const fp *x, const int *incx); +static int iamax(const int* n, const fp* x, const int* incx); template <> -int iamax(const int *n, const float *x, const int *incx) { +int iamax(const int* n, const float* x, const int* incx) { return cblas_isamax_wrapper(*n, x, *incx); } template <> -int iamax(const int *n, const double *x, const int *incx) { +int iamax(const int* n, const double* x, const int* incx) { return cblas_idamax_wrapper(*n, x, *incx); } template <> -int iamax(const int *n, const std::complex *x, const int *incx) { - return cblas_icamax_wrapper(*n, (const void *)x, *incx); +int iamax(const int* n, const std::complex* x, const int* incx) { + return cblas_icamax_wrapper(*n, (const void*)x, *incx); } template <> -int iamax(const int *n, const std::complex *x, const int *incx) { - return cblas_izamax_wrapper(*n, (const void *)x, *incx); +int iamax(const int* n, const std::complex* x, const int* incx) { + return cblas_izamax_wrapper(*n, (const void*)x, *incx); } inline float abs_val(float val) { @@ -1425,10 +1421,10 @@ inline double abs_val(std::complex val) { } template -static int iamin(const int *n, const fp *x, const int *incx); +static int iamin(const int* n, const fp* x, const int* incx); template <> -int iamin(const int *n, const float *x, const int *incx) { +int iamin(const int* n, const float* x, const int* incx) { if (*n < 1 || *incx < 1) { return 0; } @@ -1451,7 +1447,7 @@ int iamin(const int *n, const float *x, const int *incx) { } template <> -int iamin(const int *n, const double *x, const int *incx) { +int iamin(const int* n, const double* x, const int* incx) { if (*n < 1 || *incx < 1) { return 0; } @@ -1474,7 +1470,7 @@ int iamin(const int *n, const double *x, const int *incx) { } template <> -int iamin(const int *n, const std::complex *x, const int *incx) { +int iamin(const int* n, const std::complex* x, const int* incx) { if (*n < 1 || *incx < 1) { return 0; } @@ -1497,7 +1493,7 @@ int iamin(const int *n, const std::complex *x, const int *incx) { } template <> -int iamin(const int *n, const std::complex *x, const int *incx) { +int iamin(const int* n, const std::complex* x, const int* incx) { if (*n < 1 || *incx < 1) { return 0; } @@ -1522,12 +1518,12 @@ int iamin(const int *n, const std::complex *x, const int *incx) { /* Extensions */ template -static void axpby(const int *n, const fp *alpha, const fp *x, const int *incx, const fp *beta, - fp *y, const int *incy); +static void axpby(const int* n, const fp* alpha, const fp* x, const int* incx, const fp* beta, + fp* y, const int* incy); template <> -void axpby(const int *n, const float *alpha, const float *x, const int *incx, const float *beta, - float *y, const int *incy) { +void axpby(const int* n, const float* alpha, const float* x, const int* incx, const float* beta, + float* y, const int* incy) { // Not supported in NETLIB. Reference C++ implementation is used. int idx = (*incx) > 0 ? 0 : (1 - *n) * (*incx); int idy = (*incy) > 0 ? 0 : (1 - *n) * (*incy); @@ -1536,8 +1532,8 @@ void axpby(const int *n, const float *alpha, const float *x, const int *incx, co } template <> -void axpby(const int *n, const double *alpha, const double *x, const int *incx, const double *beta, - double *y, const int *incy) { +void axpby(const int* n, const double* alpha, const double* x, const int* incx, const double* beta, + double* y, const int* incy) { // Not supported in NETLIB. Reference C++ implementation is used. int idx = (*incx) > 0 ? 0 : (1 - *n) * (*incx); int idy = (*incy) > 0 ? 0 : (1 - *n) * (*incy); @@ -1546,9 +1542,9 @@ void axpby(const int *n, const double *alpha, const double *x, const int *incx, } template <> -void axpby(const int *n, const std::complex *alpha, const std::complex *x, - const int *incx, const std::complex *beta, std::complex *y, - const int *incy) { +void axpby(const int* n, const std::complex* alpha, const std::complex* x, + const int* incx, const std::complex* beta, std::complex* y, + const int* incy) { // Not supported in NETLIB. Reference C++ implementation is used. int idx = (*incx) > 0 ? 0 : (1 - *n) * (*incx); int idy = (*incy) > 0 ? 0 : (1 - *n) * (*incy); @@ -1557,9 +1553,9 @@ void axpby(const int *n, const std::complex *alpha, const std::complex -void axpby(const int *n, const std::complex *alpha, const std::complex *x, - const int *incx, const std::complex *beta, std::complex *y, - const int *incy) { +void axpby(const int* n, const std::complex* alpha, const std::complex* x, + const int* incx, const std::complex* beta, std::complex* y, + const int* incy) { // Not supported in NETLIB. Reference C++ implementation is used. int idx = (*incx) > 0 ? 0 : (1 - *n) * (*incx); int idy = (*incy) > 0 ? 0 : (1 - *n) * (*incy); @@ -1569,16 +1565,16 @@ void axpby(const int *n, const std::complex *alpha, const std::complex static void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, - const fps *alpha, const fpa *a, const int *lda, const fpa *ao, const fpb *b, - const int *ldb, const fpb *bo, const fps *beta, fpc *c, const int *ldc, - const fpc *co); + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, + const fps* alpha, const fpa* a, const int* lda, const fpa* ao, const fpb* b, + const int* ldb, const fpb* bo, const fps* beta, fpc* c, const int* ldc, + const fpc* co); template <> void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, const float *alpha, - const int8_t *a, const int *lda, const int8_t *ao, const int8_t *b, const int *ldb, - const int8_t *bo, const float *beta, int32_t *c, const int *ldc, const int32_t *co) { + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, const float* alpha, + const int8_t* a, const int* lda, const int8_t* ao, const int8_t* b, const int* ldb, + const int8_t* bo, const float* beta, int32_t* c, const int* ldc, const int32_t* co) { // Not supported in NETLIB. DGEMM is used as reference. int sizea, sizeb, sizec; if (layout == CblasColMajor) { @@ -1591,9 +1587,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - double *ad = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizea); - double *bd = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb); - double *cd = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); + double* ad = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizea); + double* bd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb); + double* cd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); double alphad = *alpha; double betad = *beta; double aod = *ao; @@ -1611,10 +1607,10 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran template <> void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, const float *alpha, - const int8_t *a, const int *lda, const int8_t *ao, const uint8_t *b, const int *ldb, - const uint8_t *bo, const float *beta, int32_t *c, const int *ldc, - const int32_t *co) { + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, const float* alpha, + const int8_t* a, const int* lda, const int8_t* ao, const uint8_t* b, const int* ldb, + const uint8_t* bo, const float* beta, int32_t* c, const int* ldc, + const int32_t* co) { // Not supported in NETLIB. DGEMM is used as reference. int sizea, sizeb, sizec; if (layout == CblasColMajor) { @@ -1627,9 +1623,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - double *ad = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizea); - double *bd = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb); - double *cd = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); + double* ad = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizea); + double* bd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb); + double* cd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); double alphad = *alpha; double betad = *beta; double aod = *ao; @@ -1647,9 +1643,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran template <> void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, const float *alpha, - const uint8_t *a, const int *lda, const uint8_t *ao, const int8_t *b, const int *ldb, - const int8_t *bo, const float *beta, int32_t *c, const int *ldc, const int32_t *co) { + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, const float* alpha, + const uint8_t* a, const int* lda, const uint8_t* ao, const int8_t* b, const int* ldb, + const int8_t* bo, const float* beta, int32_t* c, const int* ldc, const int32_t* co) { // Not supported in NETLIB. DGEMM is used as reference. int sizea, sizeb, sizec; if (layout == CblasColMajor) { @@ -1662,9 +1658,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - double *ad = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizea); - double *bd = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb); - double *cd = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); + double* ad = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizea); + double* bd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb); + double* cd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); double alphad = *alpha; double betad = *beta; double aod = *ao; @@ -1682,10 +1678,10 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran template <> void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, const float *alpha, - const uint8_t *a, const int *lda, const uint8_t *ao, const uint8_t *b, - const int *ldb, const uint8_t *bo, const float *beta, int32_t *c, const int *ldc, - const int32_t *co) { + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, const float* alpha, + const uint8_t* a, const int* lda, const uint8_t* ao, const uint8_t* b, + const int* ldb, const uint8_t* bo, const float* beta, int32_t* c, const int* ldc, + const int32_t* co) { // Not supported in NETLIB. DGEMM is used as reference. int sizea, sizeb, sizec; if (layout == CblasColMajor) { @@ -1698,9 +1694,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - double *ad = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizea); - double *bd = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb); - double *cd = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); + double* ad = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizea); + double* bd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizeb); + double* cd = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); double alphad = *alpha; double betad = *beta; double aod = *ao; @@ -1718,19 +1714,19 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran template static void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const fp *alpha, const fp *a, - const int *lda, const fp *b, const int *ldb, const fp *beta, fp *c, - const int *ldc); + CBLAS_TRANSPOSE transb, const int* n, const int* k, const fp* alpha, const fp* a, + const int* lda, const fp* b, const int* ldb, const fp* beta, fp* c, + const int* ldc); template <> void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const float *alpha, const float *a, - const int *lda, const float *b, const int *ldb, const float *beta, float *c, - const int *ldc) { + CBLAS_TRANSPOSE transb, const int* n, const int* k, const float* alpha, const float* a, + const int* lda, const float* b, const int* ldb, const float* beta, float* c, + const int* ldc) { // Not supported in NETLIB. SGEMM is used as reference. int sizec; sizec = *ldc * *n; - float *cf = (float *)oneapi::math::aligned_alloc(64, sizeof(float) * sizec); + float* cf = (float*)oneapi::math::aligned_alloc(64, sizeof(float) * sizec); update_c(c, layout, upper_lower, *n, *n, *ldc, cf); cblas_sgemm_wrapper(layout, transa, transb, *n, *n, *k, *alpha, a, *lda, b, *ldb, *beta, cf, *ldc); @@ -1740,13 +1736,13 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, template <> void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const double *alpha, const double *a, - const int *lda, const double *b, const int *ldb, const double *beta, double *c, - const int *ldc) { + CBLAS_TRANSPOSE transb, const int* n, const int* k, const double* alpha, const double* a, + const int* lda, const double* b, const int* ldb, const double* beta, double* c, + const int* ldc) { // Not supported in NETLIB. DGEMM is used as reference. int sizec; sizec = *ldc * *n; - double *cf = (double *)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); + double* cf = (double*)oneapi::math::aligned_alloc(64, sizeof(double) * sizec); update_c(c, layout, upper_lower, *n, *n, *ldc, cf); cblas_dgemm_wrapper(layout, transa, transb, *n, *n, *k, *alpha, a, *lda, b, *ldb, *beta, cf, *ldc); @@ -1756,15 +1752,15 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, template <> void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *b, - const int *ldb, const std::complex *beta, std::complex *c, - const int *ldc) { + CBLAS_TRANSPOSE transb, const int* n, const int* k, const std::complex* alpha, + const std::complex* a, const int* lda, const std::complex* b, + const int* ldb, const std::complex* beta, std::complex* c, + const int* ldc) { // Not supported in NETLIB. CGEMM is used as reference. int sizec; sizec = *ldc * *n; - std::complex *cf = - (std::complex *)oneapi::math::aligned_alloc(64, sizeof(std::complex) * sizec); + std::complex* cf = + (std::complex*)oneapi::math::aligned_alloc(64, sizeof(std::complex) * sizec); update_c(c, layout, upper_lower, *n, *n, *ldc, cf); cblas_cgemm_wrapper(layout, transa, transb, *n, *n, *k, alpha, a, *lda, b, *ldb, beta, cf, *ldc); @@ -1774,14 +1770,14 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, template <> void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *b, - const int *ldb, const std::complex *beta, std::complex *c, - const int *ldc) { + CBLAS_TRANSPOSE transb, const int* n, const int* k, const std::complex* alpha, + const std::complex* a, const int* lda, const std::complex* b, + const int* ldb, const std::complex* beta, std::complex* c, + const int* ldc) { // Not supported in NETLIB. ZGEMM is used as reference. int sizec; sizec = *ldc * *n; - std::complex *cf = (std::complex *)oneapi::math::aligned_alloc( + std::complex* cf = (std::complex*)oneapi::math::aligned_alloc( 64, sizeof(std::complex) * sizec); update_c(c, layout, upper_lower, *n, *n, *ldc, cf); cblas_zgemm_wrapper(layout, transa, transb, *n, *n, *k, alpha, a, *lda, b, *ldb, beta, cf, @@ -1791,12 +1787,12 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, } template -static void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, - const fp *a, const int *lda, const fp *x, const int *incx, fp *c, const int *ldc); +static void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, + const fp* a, const int* lda, const fp* x, const int* incx, fp* c, const int* ldc); template <> -void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, const float *a, - const int *lda, const float *x, const int *incx, float *c, const int *ldc) { +void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, const float* a, + const int* lda, const float* x, const int* incx, float* c, const int* ldc) { // Not supported in NETLIB. Reference C++ implementation is used. float tmp; int size_x = (left_right == CblasLeft) ? *m : *n; @@ -1827,8 +1823,8 @@ void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n } template <> -void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, const double *a, - const int *lda, const double *x, const int *incx, double *c, const int *ldc) { +void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, const double* a, + const int* lda, const double* x, const int* incx, double* c, const int* ldc) { // Not supported in NETLIB. Reference C++ implementation is used. double tmp; int size_x = (left_right == CblasLeft) ? *m : *n; @@ -1859,9 +1855,9 @@ void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n } template <> -void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, - const std::complex *a, const int *lda, const std::complex *x, - const int *incx, std::complex *c, const int *ldc) { +void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, + const std::complex* a, const int* lda, const std::complex* x, + const int* incx, std::complex* c, const int* ldc) { // Not supported in NETLIB. Reference C++ implementation is used. std::complex tmp; int size_x = (left_right == CblasLeft) ? *m : *n; @@ -1912,9 +1908,9 @@ void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n } template <> -void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, - const std::complex *a, const int *lda, const std::complex *x, - const int *incx, std::complex *c, const int *ldc) { +void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, + const std::complex* a, const int* lda, const std::complex* x, + const int* incx, std::complex* c, const int* ldc) { // Not supported in NETLIB. Reference C++ implementation is used. std::complex tmp; int size_x = (left_right == CblasLeft) ? *m : *n; @@ -1979,7 +1975,7 @@ fp sametype_conj(fp x) { template void omatcopy_ref(oneapi::math::layout layout, oneapi::math::transpose trans, int64_t m, int64_t n, - fp alpha, fp *A, int64_t lda, fp *B, int64_t ldb) { + fp alpha, fp* A, int64_t lda, fp* B, int64_t ldb) { int64_t logical_m, logical_n; if (layout == oneapi::math::layout::col_major) { logical_m = m; @@ -2014,9 +2010,9 @@ void omatcopy_ref(oneapi::math::layout layout, oneapi::math::transpose trans, in } template -void omatcopy2_ref(oneapi::math::layout layout, oneapi::math::transpose trans, const int64_t &m, - const int64_t &n, const fp &alpha, const fp *in_matrix, const int64_t &ld_in, - const int64_t &inc_in, fp *out_matrix, const int64_t &ld_out, +void omatcopy2_ref(oneapi::math::layout layout, oneapi::math::transpose trans, const int64_t& m, + const int64_t& n, const fp& alpha, const fp* in_matrix, const int64_t& ld_in, + const int64_t& inc_in, fp* out_matrix, const int64_t& ld_out, const int64_t inc_out) { int64_t logical_m, logical_n; if (layout == oneapi::math::layout::col_major) { @@ -2061,7 +2057,7 @@ void omatcopy2_ref(oneapi::math::layout layout, oneapi::math::transpose trans, c template void imatcopy_ref(oneapi::math::layout layout, oneapi::math::transpose trans, int64_t m, int64_t n, - fp alpha, fp *A, int64_t lda, int64_t ldb) { + fp alpha, fp* A, int64_t lda, int64_t ldb) { int64_t logical_m, logical_n; if (layout == oneapi::math::layout::col_major) { logical_m = m; @@ -2115,8 +2111,8 @@ void imatcopy_ref(oneapi::math::layout layout, oneapi::math::transpose trans, in template void omatadd_ref(oneapi::math::layout layout, oneapi::math::transpose transa, - oneapi::math::transpose transb, int64_t m, int64_t n, fp alpha, fp *A, int64_t lda, - fp beta, fp *B, int64_t ldb, fp *C, int64_t ldc) { + oneapi::math::transpose transb, int64_t m, int64_t n, fp alpha, fp* A, int64_t lda, + fp beta, fp* B, int64_t ldb, fp* C, int64_t ldc) { int64_t logical_m, logical_n; if (layout == oneapi::math::layout::col_major) { logical_m = m; diff --git a/tests/unit_tests/blas/include/reference_blas_wrappers.hpp b/tests/unit_tests/blas/include/reference_blas_wrappers.hpp index 0ba5506c0..d00d20947 100644 --- a/tests/unit_tests/blas/include/reference_blas_wrappers.hpp +++ b/tests/unit_tests/blas/include/reference_blas_wrappers.hpp @@ -27,7 +27,7 @@ #ifdef __linux__ #include -#define LIB_TYPE void * +#define LIB_TYPE void* #define GET_LIB_HANDLE(libname) dlopen((libname), RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND) #define GET_FUNC(lib, fn) dlsym(lib, (fn)) #elif defined(_WIN64) @@ -68,129 +68,129 @@ static LIB_TYPE cblas_library() { static void (*cblas_sgemm_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, const float alpha, - const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc); + const float* a, const int lda, const float* b, const int ldb, + const float beta, float* c, const int ldc); static void (*cblas_dgemm_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, const double alpha, - const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc); + const double* a, const int lda, const double* b, const int ldb, + const double beta, double* c, const int ldc); static void (*cblas_cgemm_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, - const void *a, const int lda, const void *b, const int ldb, - const void *beta, void *c, const int ldc); + const int m, const int n, const int k, const void* alpha, + const void* a, const int lda, const void* b, const int ldb, + const void* beta, void* c, const int ldc); static void (*cblas_zgemm_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, - const void *a, const int lda, const void *b, const int ldb, - const void *beta, void *c, const int ldc); + const int m, const int n, const int k, const void* alpha, + const void* a, const int lda, const void* b, const int ldb, + const void* beta, void* c, const int ldc); static void (*cblas_ssymm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, - float *c, const int ldc); + const int m, const int n, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, + float* c, const int ldc); static void (*cblas_dsymm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const double alpha, const double *a, - const int lda, const double *b, const int ldb, const double beta, - double *c, const int ldc); + const int m, const int n, const double alpha, const double* a, + const int lda, const double* b, const int ldb, const double beta, + double* c, const int ldc); static void (*cblas_csymm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc); static void (*cblas_zsymm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc); static void (*cblas_ssyrk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float beta, float *c, const int ldc); + const int n, const int k, const float alpha, const float* a, + const int lda, const float beta, float* c, const int ldc); static void (*cblas_dsyrk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double beta, double *c, const int ldc); + const int n, const int k, const double alpha, const double* a, + const int lda, const double beta, double* c, const int ldc); static void (*cblas_csyrk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc); static void (*cblas_zsyrk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc); static void (*cblas_chemm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc); static void (*cblas_zhemm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc); static void (*cblas_cherk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const void *a, - const int lda, const float beta, void *c, const int ldc); + const int n, const int k, const float alpha, const void* a, + const int lda, const float beta, void* c, const int ldc); static void (*cblas_zherk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const void *a, - const int lda, const double beta, void *c, const int ldc); + const int n, const int k, const double alpha, const void* a, + const int lda, const double beta, void* c, const int ldc); static void (*cblas_ssyr2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, - float *c, const int ldc); + const int n, const int k, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, + float* c, const int ldc); static void (*cblas_dsyr2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double *b, const int ldb, const double beta, - double *c, const int ldc); + const int n, const int k, const double alpha, const double* a, + const int lda, const double* b, const int ldb, const double beta, + double* c, const int ldc); static void (*cblas_csyr2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc); static void (*cblas_zsyr2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc); static void (*cblas_cher2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const float beta, - void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const float beta, + void* c, const int ldc); static void (*cblas_zher2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const double beta, - void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const double beta, + void* c, const int ldc); static void (*cblas_strmm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const float alpha, const float *a, const int lda, float *b, + const float alpha, const float* a, const int lda, float* b, const int ldb); static void (*cblas_dtrmm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const double alpha, const double *a, const int lda, double *b, + const double alpha, const double* a, const int lda, double* b, const int ldb); static void (*cblas_ctrmm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb); static void (*cblas_ztrmm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb); static void (*cblas_strsm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const float alpha, const float *a, const int lda, float *b, + const float alpha, const float* a, const int lda, float* b, const int ldb); static void (*cblas_dtrsm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const double alpha, const double *a, const int lda, double *b, + const double alpha, const double* a, const int lda, double* b, const int ldb); static void (*cblas_ctrsm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb); static void (*cblas_ztrsm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb); static void cblas_sgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, const float alpha, - const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc) { + const float* a, const int lda, const float* b, const int ldb, + const float beta, float* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_sgemm_p == NULL) cblas_sgemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, float *c, + const int m, const int n, const int k, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, float* c, const int ldc))GET_FUNC(h_libcblas, "cblas_sgemm"); if (cblas_sgemm_p != NULL) cblas_sgemm_p(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -199,14 +199,14 @@ static void cblas_sgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBL static void cblas_dgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, const double alpha, - const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc) { + const double* a, const int lda, const double* b, const int ldb, + const double beta, double* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_dgemm_p == NULL) cblas_dgemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, - const double alpha, const double *a, const int lda, - const double *b, const int ldb, const double beta, double *c, + const double alpha, const double* a, const int lda, + const double* b, const int ldb, const double beta, double* c, const int ldc))GET_FUNC(h_libcblas, "cblas_dgemm"); if (cblas_dgemm_p != NULL) cblas_dgemm_p(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -214,15 +214,15 @@ static void cblas_dgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBL } static void cblas_cgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, - const void *a, const int lda, const void *b, const int ldb, - const void *beta, void *c, const int ldc) { + const int m, const int n, const int k, const void* alpha, + const void* a, const int lda, const void* b, const int ldb, + const void* beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_cgemm_p == NULL) cblas_cgemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_cgemm"); if (cblas_cgemm_p != NULL) cblas_cgemm_p(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -230,15 +230,15 @@ static void cblas_cgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBL } static void cblas_zgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, - const void *a, const int lda, const void *b, const int ldb, - const void *beta, void *c, const int ldc) { + const int m, const int n, const int k, const void* alpha, + const void* a, const int lda, const void* b, const int ldb, + const void* beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zgemm_p == NULL) cblas_zgemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zgemm"); if (cblas_zgemm_p != NULL) cblas_zgemm_p(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -246,15 +246,15 @@ static void cblas_zgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBL } static void cblas_ssymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, - float *c, const int ldc) { + const int m, const int n, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, + float* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_ssymm_p == NULL) cblas_ssymm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int m, - const int n, const float alpha, const float *a, const int lda, - const float *b, const int ldb, const float beta, float *c, + const int n, const float alpha, const float* a, const int lda, + const float* b, const int ldb, const float beta, float* c, const int ldc))GET_FUNC(h_libcblas, "cblas_ssymm"); if (cblas_ssymm_p != NULL) cblas_ssymm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); @@ -262,15 +262,15 @@ static void cblas_ssymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLA } static void cblas_dsymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const double alpha, const double *a, - const int lda, const double *b, const int ldb, const double beta, - double *c, const int ldc) { + const int m, const int n, const double alpha, const double* a, + const int lda, const double* b, const int ldb, const double beta, + double* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_dsymm_p == NULL) cblas_dsymm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int m, - const int n, const double alpha, const double *a, const int lda, - const double *b, const int ldb, const double beta, double *c, + const int n, const double alpha, const double* a, const int lda, + const double* b, const int ldb, const double beta, double* c, const int ldc))GET_FUNC(h_libcblas, "cblas_dsymm"); if (cblas_dsymm_p != NULL) cblas_dsymm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); @@ -278,43 +278,43 @@ static void cblas_dsymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLA } static void cblas_csymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_csymm_p == NULL) cblas_csymm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc))GET_FUNC(h_libcblas, "cblas_csymm"); + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_csymm"); if (cblas_csymm_p != NULL) cblas_csymm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } } static void cblas_zsymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zsymm_p == NULL) cblas_zsymm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc))GET_FUNC(h_libcblas, "cblas_zsymm"); + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zsymm"); if (cblas_zsymm_p != NULL) cblas_zsymm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } } static void cblas_ssyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float beta, float *c, const int ldc) { + const int n, const int k, const float alpha, const float* a, + const int lda, const float beta, float* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_ssyrk_p == NULL) cblas_ssyrk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float beta, float *c, + const int n, const int k, const float alpha, const float* a, + const int lda, const float beta, float* c, const int ldc))GET_FUNC(h_libcblas, "cblas_ssyrk"); if (cblas_ssyrk_p != NULL) cblas_ssyrk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -322,13 +322,13 @@ static void cblas_ssyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_dsyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double beta, double *c, const int ldc) { + const int n, const int k, const double alpha, const double* a, + const int lda, const double beta, double* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_dsyrk_p == NULL) cblas_dsyrk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double beta, double *c, + const int n, const int k, const double alpha, const double* a, + const int lda, const double beta, double* c, const int ldc))GET_FUNC(h_libcblas, "cblas_dsyrk"); if (cblas_dsyrk_p != NULL) cblas_dsyrk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -336,13 +336,13 @@ static void cblas_dsyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_csyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_csyrk_p == NULL) cblas_csyrk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_csyrk"); if (cblas_csyrk_p != NULL) cblas_csyrk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -350,13 +350,13 @@ static void cblas_csyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_zsyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zsyrk_p == NULL) cblas_zsyrk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zsyrk"); if (cblas_zsyrk_p != NULL) cblas_zsyrk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -364,43 +364,43 @@ static void cblas_zsyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_chemm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_chemm_p == NULL) cblas_chemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc))GET_FUNC(h_libcblas, "cblas_chemm"); + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_chemm"); if (cblas_chemm_p != NULL) cblas_chemm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } } static void cblas_zhemm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zhemm_p == NULL) cblas_zhemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc))GET_FUNC(h_libcblas, "cblas_zhemm"); + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zhemm"); if (cblas_zhemm_p != NULL) cblas_zhemm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } } static void cblas_cherk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const void *a, - const int lda, const float beta, void *c, const int ldc) { + const int n, const int k, const float alpha, const void* a, + const int lda, const float beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_cherk_p == NULL) cblas_cherk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const void *a, - const int lda, const float beta, void *c, + const int n, const int k, const float alpha, const void* a, + const int lda, const float beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_cherk"); if (cblas_cherk_p != NULL) cblas_cherk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -408,13 +408,13 @@ static void cblas_cherk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_zherk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const void *a, - const int lda, const double beta, void *c, const int ldc) { + const int n, const int k, const double alpha, const void* a, + const int lda, const double beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zherk_p == NULL) cblas_zherk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const void *a, - const int lda, const double beta, void *c, + const int n, const int k, const double alpha, const void* a, + const int lda, const double beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zherk"); if (cblas_zherk_p != NULL) cblas_zherk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -422,15 +422,15 @@ static void cblas_zherk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_ssyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, - float *c, const int ldc) { + const int n, const int k, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, + float* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_ssyr2k_p == NULL) cblas_ssyr2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const float alpha, const float *a, const int lda, - const float *b, const int ldb, const float beta, float *c, + const int k, const float alpha, const float* a, const int lda, + const float* b, const int ldb, const float beta, float* c, const int ldc))GET_FUNC(h_libcblas, "cblas_ssyr2k"); if (cblas_ssyr2k_p != NULL) cblas_ssyr2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -438,15 +438,15 @@ static void cblas_ssyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_dsyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double *b, const int ldb, const double beta, - double *c, const int ldc) { + const int n, const int k, const double alpha, const double* a, + const int lda, const double* b, const int ldb, const double beta, + double* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_dsyr2k_p == NULL) cblas_dsyr2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const double alpha, const double *a, const int lda, - const double *b, const int ldb, const double beta, double *c, + const int k, const double alpha, const double* a, const int lda, + const double* b, const int ldb, const double beta, double* c, const int ldc))GET_FUNC(h_libcblas, "cblas_dsyr2k"); if (cblas_dsyr2k_p != NULL) cblas_dsyr2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -454,15 +454,15 @@ static void cblas_dsyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_csyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_csyr2k_p == NULL) cblas_csyr2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *b, const int ldb, const void *beta, void *c, + const int k, const void* alpha, const void* a, const int lda, + const void* b, const int ldb, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_csyr2k"); if (cblas_csyr2k_p != NULL) cblas_csyr2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -470,15 +470,15 @@ static void cblas_csyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_zsyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zsyr2k_p == NULL) cblas_zsyr2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *b, const int ldb, const void *beta, void *c, + const int k, const void* alpha, const void* a, const int lda, + const void* b, const int ldb, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zsyr2k"); if (cblas_zsyr2k_p != NULL) cblas_zsyr2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -486,15 +486,15 @@ static void cblas_zsyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_cher2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const float beta, - void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const float beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_cher2k_p == NULL) cblas_cher2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *b, const int ldb, const float beta, void *c, + const int k, const void* alpha, const void* a, const int lda, + const void* b, const int ldb, const float beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_cher2k"); if (cblas_cher2k_p != NULL) cblas_cher2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -502,15 +502,15 @@ static void cblas_cher2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_zher2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const double beta, - void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const double beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zher2k_p == NULL) cblas_zher2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *b, const int ldb, const double beta, void *c, + const int k, const void* alpha, const void* a, const int lda, + const void* b, const int ldb, const double beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zher2k"); if (cblas_zher2k_p != NULL) cblas_zher2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -519,14 +519,14 @@ static void cblas_zher2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA static void cblas_strmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const float alpha, const float *a, const int lda, float *b, + const float alpha, const float* a, const int lda, float* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_strmm_p == NULL) cblas_strmm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const float alpha, const float *a, const int lda, - float *b, const int ldb))GET_FUNC(h_libcblas, "cblas_strmm"); + const int n, const float alpha, const float* a, const int lda, + float* b, const int ldb))GET_FUNC(h_libcblas, "cblas_strmm"); if (cblas_strmm_p != NULL) cblas_strmm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -534,14 +534,14 @@ static void cblas_strmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_dtrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const double alpha, const double *a, const int lda, double *b, + const double alpha, const double* a, const int lda, double* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_dtrmm_p == NULL) cblas_dtrmm_p = (void (*)( CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int m, const int n, const double alpha, const double *a, - const int lda, double *b, const int ldb))GET_FUNC(h_libcblas, "cblas_dtrmm"); + CBLAS_DIAG diag, const int m, const int n, const double alpha, const double* a, + const int lda, double* b, const int ldb))GET_FUNC(h_libcblas, "cblas_dtrmm"); if (cblas_dtrmm_p != NULL) cblas_dtrmm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -549,14 +549,14 @@ static void cblas_dtrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_ctrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_ctrmm_p == NULL) cblas_ctrmm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const void *alpha, const void *a, const int lda, - void *b, const int ldb))GET_FUNC(h_libcblas, "cblas_ctrmm"); + const int n, const void* alpha, const void* a, const int lda, + void* b, const int ldb))GET_FUNC(h_libcblas, "cblas_ctrmm"); if (cblas_ctrmm_p != NULL) cblas_ctrmm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -564,14 +564,14 @@ static void cblas_ctrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_ztrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_ztrmm_p == NULL) cblas_ztrmm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const void *alpha, const void *a, const int lda, - void *b, const int ldb))GET_FUNC(h_libcblas, "cblas_ztrmm"); + const int n, const void* alpha, const void* a, const int lda, + void* b, const int ldb))GET_FUNC(h_libcblas, "cblas_ztrmm"); if (cblas_ztrmm_p != NULL) cblas_ztrmm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -579,14 +579,14 @@ static void cblas_ztrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_strsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const float alpha, const float *a, const int lda, float *b, + const float alpha, const float* a, const int lda, float* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_strsm_p == NULL) cblas_strsm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const float alpha, const float *a, const int lda, - float *b, const int ldb))GET_FUNC(h_libcblas, "cblas_strsm"); + const int n, const float alpha, const float* a, const int lda, + float* b, const int ldb))GET_FUNC(h_libcblas, "cblas_strsm"); if (cblas_strsm_p != NULL) cblas_strsm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -594,14 +594,14 @@ static void cblas_strsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_dtrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const double alpha, const double *a, const int lda, double *b, + const double alpha, const double* a, const int lda, double* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_dtrsm_p == NULL) cblas_dtrsm_p = (void (*)( CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int m, const int n, const double alpha, const double *a, - const int lda, double *b, const int ldb))GET_FUNC(h_libcblas, "cblas_dtrsm"); + CBLAS_DIAG diag, const int m, const int n, const double alpha, const double* a, + const int lda, double* b, const int ldb))GET_FUNC(h_libcblas, "cblas_dtrsm"); if (cblas_dtrsm_p != NULL) cblas_dtrsm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -609,14 +609,14 @@ static void cblas_dtrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_ctrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_ctrsm_p == NULL) cblas_ctrsm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const void *alpha, const void *a, const int lda, - void *b, const int ldb))GET_FUNC(h_libcblas, "cblas_ctrsm"); + const int n, const void* alpha, const void* a, const int lda, + void* b, const int ldb))GET_FUNC(h_libcblas, "cblas_ctrsm"); if (cblas_ctrsm_p != NULL) cblas_ctrsm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -624,14 +624,14 @@ static void cblas_ctrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_ztrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_ztrsm_p == NULL) cblas_ztrsm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const void *alpha, const void *a, const int lda, - void *b, const int ldb))GET_FUNC(h_libcblas, "cblas_ztrsm"); + const int n, const void* alpha, const void* a, const int lda, + void* b, const int ldb))GET_FUNC(h_libcblas, "cblas_ztrsm"); if (cblas_ztrsm_p != NULL) cblas_ztrsm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -640,213 +640,213 @@ static void cblas_ztrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO /* Level 2 */ static void (*cblas_sgemv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const float alpha, const float *a, const int lda, const float *x, - const int incx, const float beta, float *y, const int incy); + const float alpha, const float* a, const int lda, const float* x, + const int incx, const float beta, float* y, const int incy); static void (*cblas_dgemv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const double alpha, const double *a, const int lda, const double *x, - const int incx, const double beta, double *y, const int incy); + const double alpha, const double* a, const int lda, const double* x, + const int incx, const double beta, double* y, const int incy); static void (*cblas_cgemv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_zgemv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_sgbmv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + int kl, int ku, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy); static void (*cblas_dgbmv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + int kl, int ku, const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy); static void (*cblas_cgbmv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + int kl, int ku, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy); static void (*cblas_zgbmv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + int kl, int ku, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy); static void (*cblas_sger_p)(CBLAS_LAYOUT layout, const int m, const int n, const float alpha, - const float *x, const int incx, const float *y, const int incy, - float *a, const int lda); + const float* x, const int incx, const float* y, const int incy, + float* a, const int lda); static void (*cblas_dger_p)(CBLAS_LAYOUT layout, const int m, const int n, const double alpha, - const double *x, const int incx, const double *y, const int incy, - double *a, const int lda); -static void (*cblas_cgerc_p)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + const double* x, const int incx, const double* y, const int incy, + double* a, const int lda); +static void (*cblas_cgerc_p)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda); -static void (*cblas_zgerc_p)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, +static void (*cblas_zgerc_p)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda); -static void (*cblas_cgeru_p)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, +static void (*cblas_cgeru_p)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda); -static void (*cblas_zgeru_p)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, +static void (*cblas_zgeru_p)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda); static void (*cblas_chbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_zhbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_chemv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_zhemv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_cher_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a, + const float alpha, const void* x, const int incx, void* a, const int lda); static void (*cblas_zher_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a, + const double alpha, const void* x, const int incx, void* a, const int lda); static void (*cblas_cher2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a, const int lda); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a, const int lda); static void (*cblas_zher2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a, const int lda); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a, const int lda); static void (*cblas_chpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, const int incx, - const void *beta, void *y, const int incy); + const void* alpha, const void* a, const void* x, const int incx, + const void* beta, void* y, const int incy); static void (*cblas_zhpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, const int incx, - const void *beta, void *y, const int incy); + const void* alpha, const void* a, const void* x, const int incx, + const void* beta, void* y, const int incy); static void (*cblas_chpr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a); + const float alpha, const void* x, const int incx, void* a); static void (*cblas_zhpr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a); + const double alpha, const void* x, const int incx, void* a); static void (*cblas_chpr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a); static void (*cblas_zhpr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a); static void (*cblas_ssbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const float alpha, const float *a, const int lda, const float *x, - const int incx, const float beta, float *y, const int incy); + const float alpha, const float* a, const int lda, const float* x, + const int incx, const float beta, float* y, const int incy); static void (*cblas_dsbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const double alpha, const double *a, const int lda, const double *x, - const int incx, const double beta, double *y, const int incy); + const double alpha, const double* a, const int lda, const double* x, + const int incx, const double beta, double* y, const int incy); static void (*cblas_ssymv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const int lda, const float *x, - const int incx, const float beta, float *y, const int incy); + const float alpha, const float* a, const int lda, const float* x, + const int incx, const float beta, float* y, const int incy); static void (*cblas_dsymv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const int lda, const double *x, - const int incx, const double beta, double *y, const int incy); + const double alpha, const double* a, const int lda, const double* x, + const int incx, const double beta, double* y, const int incy); static void (*cblas_ssyr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a, + const float alpha, const float* x, const int incx, float* a, const int lda); static void (*cblas_dsyr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a, + const double alpha, const double* x, const int incx, double* a, const int lda); static void (*cblas_ssyr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a, const int lda); + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a, const int lda); static void (*cblas_dsyr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, const double *y, - const int incy, double *a, const int lda); + const double alpha, const double* x, const int incx, const double* y, + const int incy, double* a, const int lda); static void (*cblas_sspmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const float *x, const int incx, - const float beta, float *y, const int incy); + const float alpha, const float* a, const float* x, const int incx, + const float beta, float* y, const int incy); static void (*cblas_dspmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const double *x, const int incx, - const double beta, double *y, const int incy); + const double alpha, const double* a, const double* x, const int incx, + const double beta, double* y, const int incy); static void (*cblas_sspr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a); + const float alpha, const float* x, const int incx, float* a); static void (*cblas_dspr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a); + const double alpha, const double* x, const int incx, double* a); static void (*cblas_sspr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a); + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a); static void (*cblas_dspr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, const double *y, - const int incy, double *a); + const double alpha, const double* x, const int incx, const double* y, + const int incy, double* a); static void (*cblas_stbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const float *a, - const int lda, float *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const float* a, + const int lda, float* x, const int incx); static void (*cblas_dtbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const double *a, - const int lda, double *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const double* a, + const int lda, double* x, const int incx); static void (*cblas_ctbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx); static void (*cblas_ztbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx); static void (*cblas_stbsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const float *a, - const int lda, float *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const float* a, + const int lda, float* x, const int incx); static void (*cblas_dtbsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const double *a, - const int lda, double *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const double* a, + const int lda, double* x, const int incx); static void (*cblas_ctbsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx); static void (*cblas_ztbsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx); static void (*cblas_stpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx); static void (*cblas_dtpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx); static void (*cblas_ctpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx); static void (*cblas_ztpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx); static void (*cblas_stpsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx); static void (*cblas_dtpsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx); static void (*cblas_ctpsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx); static void (*cblas_ztpsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx); static void (*cblas_strmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx); static void (*cblas_dtrmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx); static void (*cblas_ctrmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx); static void (*cblas_ztrmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx); static void (*cblas_strsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx); static void (*cblas_dtrsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx); static void (*cblas_ctrsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx); static void (*cblas_ztrsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx); static void cblas_sgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const int n, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_sgemv_p == NULL) cblas_sgemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const int n, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sgemv"); if (cblas_sgemv_p != NULL) cblas_sgemv_p(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); @@ -854,29 +854,29 @@ static void cblas_sgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_dgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + const int n, const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dgemv_p == NULL) cblas_dgemv_p = (void (*)( CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const double alpha, const double *a, const int lda, const double *x, const int incx, - const double beta, double *y, const int incy))GET_FUNC(h_libcblas, "cblas_dgemv"); + const double alpha, const double* a, const int lda, const double* x, const int incx, + const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dgemv"); if (cblas_dgemv_p != NULL) cblas_dgemv_p(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } } static void cblas_cgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int n, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_cgemv_p == NULL) cblas_cgemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int n, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_cgemv"); if (cblas_cgemv_p != NULL) cblas_cgemv_p(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); @@ -884,14 +884,14 @@ static void cblas_cgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_zgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int n, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zgemv_p == NULL) cblas_zgemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int n, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zgemv"); if (cblas_zgemv_p != NULL) cblas_zgemv_p(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); @@ -899,15 +899,15 @@ static void cblas_zgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_sgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, int kl, int ku, const float alpha, const float *a, - const int lda, const float *x, const int incx, const float beta, - float *y, const int incy) { + const int n, int kl, int ku, const float alpha, const float* a, + const int lda, const float* x, const int incx, const float beta, + float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_sgbmv_p == NULL) cblas_sgbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + int kl, int ku, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sgbmv"); if (cblas_sgbmv_p != NULL) cblas_sgbmv_p(layout, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); @@ -915,15 +915,15 @@ static void cblas_sgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_dgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, int kl, int ku, const double alpha, const double *a, - const int lda, const double *x, const int incx, const double beta, - double *y, const int incy) { + const int n, int kl, int ku, const double alpha, const double* a, + const int lda, const double* x, const int incx, const double beta, + double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dgbmv_p == NULL) cblas_dgbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + int kl, int ku, const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dgbmv"); if (cblas_dgbmv_p != NULL) cblas_dgbmv_p(layout, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); @@ -931,15 +931,15 @@ static void cblas_dgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_cgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, int kl, int ku, const void *alpha, const void *a, - const int lda, const void *x, const int incx, const void *beta, - void *y, const int incy) { + const int n, int kl, int ku, const void* alpha, const void* a, + const int lda, const void* x, const int incx, const void* beta, + void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_cgbmv_p == NULL) cblas_cgbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + int kl, int ku, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_cgbmv"); if (cblas_cgbmv_p != NULL) cblas_cgbmv_p(layout, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); @@ -947,15 +947,15 @@ static void cblas_cgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_zgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, int kl, int ku, const void *alpha, const void *a, - const int lda, const void *x, const int incx, const void *beta, - void *y, const int incy) { + const int n, int kl, int ku, const void* alpha, const void* a, + const int lda, const void* x, const int incx, const void* beta, + void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zgbmv_p == NULL) cblas_zgbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + int kl, int ku, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zgbmv"); if (cblas_zgbmv_p != NULL) cblas_zgbmv_p(layout, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); @@ -963,13 +963,13 @@ static void cblas_zgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_sger_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const float alpha, - const float *x, const int incx, const float *y, const int incy, - float *a, const int lda) { + const float* x, const int incx, const float* y, const int incy, + float* a, const int lda) { if (cblas_library() != NULL) { if (cblas_sger_p == NULL) cblas_sger_p = (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const float alpha, - const float *x, const int incx, const float *y, const int incy, float *a, + const float* x, const int incx, const float* y, const int incy, float* a, const int lda))GET_FUNC(h_libcblas, "cblas_sger"); if (cblas_sger_p != NULL) cblas_sger_p(layout, m, n, alpha, x, incx, y, incy, a, lda); @@ -977,69 +977,69 @@ static void cblas_sger_wrapper(CBLAS_LAYOUT layout, const int m, const int n, co } static void cblas_dger_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const double alpha, - const double *x, const int incx, const double *y, const int incy, - double *a, const int lda) { + const double* x, const int incx, const double* y, const int incy, + double* a, const int lda) { if (cblas_library() != NULL) { if (cblas_dger_p == NULL) cblas_dger_p = (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const double alpha, - const double *x, const int incx, const double *y, const int incy, - double *a, const int lda))GET_FUNC(h_libcblas, "cblas_dger"); + const double* x, const int incx, const double* y, const int incy, + double* a, const int lda))GET_FUNC(h_libcblas, "cblas_dger"); if (cblas_dger_p != NULL) cblas_dger_p(layout, m, n, alpha, x, incx, y, incy, a, lda); } } -static void cblas_cgerc_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, - void *a, const int lda) { +static void cblas_cgerc_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, + void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_cgerc_p == NULL) cblas_cgerc_p = - (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_cgerc"); if (cblas_cgerc_p != NULL) cblas_cgerc_p(layout, m, n, alpha, x, incx, y, incy, a, lda); } } -static void cblas_zgerc_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, - void *a, const int lda) { +static void cblas_zgerc_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, + void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_zgerc_p == NULL) cblas_zgerc_p = - (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_zgerc"); if (cblas_zgerc_p != NULL) cblas_zgerc_p(layout, m, n, alpha, x, incx, y, incy, a, lda); } } -static void cblas_cgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, - void *a, const int lda) { +static void cblas_cgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, + void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_cgeru_p == NULL) cblas_cgeru_p = - (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_cgeru"); if (cblas_cgeru_p != NULL) cblas_cgeru_p(layout, m, n, alpha, x, incx, y, incy, a, lda); } } -static void cblas_zgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, - void *a, const int lda) { +static void cblas_zgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, + void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_zgeru_p == NULL) cblas_zgeru_p = - (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_zgeru"); if (cblas_zgeru_p != NULL) cblas_zgeru_p(layout, m, n, alpha, x, incx, y, incy, a, lda); @@ -1047,14 +1047,14 @@ static void cblas_zgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, c } static void cblas_chbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int k, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_chbmv_p == NULL) cblas_chbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int k, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_chbmv"); if (cblas_chbmv_p != NULL) cblas_chbmv_p(layout, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); @@ -1062,14 +1062,14 @@ static void cblas_chbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_zhbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int k, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zhbmv_p == NULL) cblas_zhbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int k, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zhbmv"); if (cblas_zhbmv_p != NULL) cblas_zhbmv_p(layout, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); @@ -1077,13 +1077,13 @@ static void cblas_zhbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_chemv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy) { + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_chemv_p == NULL) cblas_chemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_chemv"); if (cblas_chemv_p != NULL) cblas_chemv_p(layout, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); @@ -1091,13 +1091,13 @@ static void cblas_chemv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_zhemv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy) { + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zhemv_p == NULL) cblas_zhemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zhemv"); if (cblas_zhemv_p != NULL) cblas_zhemv_p(layout, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); @@ -1105,12 +1105,12 @@ static void cblas_zhemv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_cher_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a, + const float alpha, const void* x, const int incx, void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_cher_p == NULL) cblas_cher_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a, + const float alpha, const void* x, const int incx, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_cher"); if (cblas_cher_p != NULL) cblas_cher_p(layout, upper_lower, n, alpha, x, incx, a, lda); @@ -1118,12 +1118,12 @@ static void cblas_cher_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, cons } static void cblas_zher_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a, + const double alpha, const void* x, const int incx, void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_zher_p == NULL) cblas_zher_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a, + const double alpha, const void* x, const int incx, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_zher"); if (cblas_zher_p != NULL) cblas_zher_p(layout, upper_lower, n, alpha, x, incx, a, lda); @@ -1131,13 +1131,13 @@ static void cblas_zher_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, cons } static void cblas_cher2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a, const int lda) { + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_cher2_p == NULL) cblas_cher2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, - const void *y, const int incy, void *a, + const void* alpha, const void* x, const int incx, + const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_cher2"); if (cblas_cher2_p != NULL) cblas_cher2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a, lda); @@ -1145,13 +1145,13 @@ static void cblas_cher2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_zher2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a, const int lda) { + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_zher2_p == NULL) cblas_zher2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, - const void *y, const int incy, void *a, + const void* alpha, const void* x, const int incx, + const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_zher2"); if (cblas_zher2_p != NULL) cblas_zher2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a, lda); @@ -1159,13 +1159,13 @@ static void cblas_zher2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_chpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, const int incx, - const void *beta, void *y, const int incy) { + const void* alpha, const void* a, const void* x, const int incx, + const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_chpmv_p == NULL) cblas_chpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, - const int incx, const void *beta, void *y, + const void* alpha, const void* a, const void* x, + const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_chpmv"); if (cblas_chpmv_p != NULL) cblas_chpmv_p(layout, upper_lower, n, alpha, a, x, incx, beta, y, incy); @@ -1173,13 +1173,13 @@ static void cblas_chpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_zhpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, const int incx, - const void *beta, void *y, const int incy) { + const void* alpha, const void* a, const void* x, const int incx, + const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zhpmv_p == NULL) cblas_zhpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, - const int incx, const void *beta, void *y, + const void* alpha, const void* a, const void* x, + const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zhpmv"); if (cblas_zhpmv_p != NULL) cblas_zhpmv_p(layout, upper_lower, n, alpha, a, x, incx, beta, y, incy); @@ -1187,66 +1187,66 @@ static void cblas_zhpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_chpr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a) { + const float alpha, const void* x, const int incx, void* a) { if (cblas_library() != NULL) { if (cblas_chpr_p == NULL) cblas_chpr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, - void *a))GET_FUNC(h_libcblas, "cblas_chpr"); + const float alpha, const void* x, const int incx, + void* a))GET_FUNC(h_libcblas, "cblas_chpr"); if (cblas_chpr_p != NULL) cblas_chpr_p(layout, upper_lower, n, alpha, x, incx, a); } } static void cblas_zhpr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a) { + const double alpha, const void* x, const int incx, void* a) { if (cblas_library() != NULL) { if (cblas_zhpr_p == NULL) cblas_zhpr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, - void *a))GET_FUNC(h_libcblas, "cblas_zhpr"); + const double alpha, const void* x, const int incx, + void* a))GET_FUNC(h_libcblas, "cblas_zhpr"); if (cblas_zhpr_p != NULL) cblas_zhpr_p(layout, upper_lower, n, alpha, x, incx, a); } } static void cblas_chpr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a) { + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a) { if (cblas_library() != NULL) { if (cblas_chpr2_p == NULL) cblas_chpr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a))GET_FUNC(h_libcblas, "cblas_chpr2"); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a))GET_FUNC(h_libcblas, "cblas_chpr2"); if (cblas_chpr2_p != NULL) cblas_chpr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a); } } static void cblas_zhpr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a) { + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a) { if (cblas_library() != NULL) { if (cblas_zhpr2_p == NULL) cblas_zhpr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a))GET_FUNC(h_libcblas, "cblas_zhpr2"); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a))GET_FUNC(h_libcblas, "cblas_zhpr2"); if (cblas_zhpr2_p != NULL) cblas_zhpr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a); } } static void cblas_ssbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const int k, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_ssbmv_p == NULL) cblas_ssbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const int k, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_ssbmv"); if (cblas_ssbmv_p != NULL) cblas_ssbmv_p(layout, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); @@ -1254,28 +1254,28 @@ static void cblas_ssbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_dsbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + const int k, const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dsbmv_p == NULL) cblas_dsbmv_p = (void (*)( CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const double alpha, const double *a, const int lda, const double *x, const int incx, - const double beta, double *y, const int incy))GET_FUNC(h_libcblas, "cblas_dsbmv"); + const double alpha, const double* a, const int lda, const double* x, const int incx, + const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dsbmv"); if (cblas_dsbmv_p != NULL) cblas_dsbmv_p(layout, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } } static void cblas_ssymv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const int lda, const float *x, - const int incx, const float beta, float *y, const int incy) { + const float alpha, const float* a, const int lda, const float* x, + const int incx, const float beta, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_ssymv_p == NULL) cblas_ssymv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_ssymv"); if (cblas_ssymv_p != NULL) cblas_ssymv_p(layout, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); @@ -1283,13 +1283,13 @@ static void cblas_ssymv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_dsymv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const int lda, const double *x, - const int incx, const double beta, double *y, const int incy) { + const double alpha, const double* a, const int lda, const double* x, + const int incx, const double beta, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dsymv_p == NULL) cblas_dsymv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dsymv"); if (cblas_dsymv_p != NULL) cblas_dsymv_p(layout, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); @@ -1297,12 +1297,12 @@ static void cblas_dsymv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_ssyr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a, + const float alpha, const float* x, const int incx, float* a, const int lda) { if (cblas_library() != NULL) { if (cblas_ssyr_p == NULL) cblas_ssyr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a, + const float alpha, const float* x, const int incx, float* a, const int lda))GET_FUNC(h_libcblas, "cblas_ssyr"); if (cblas_ssyr_p != NULL) cblas_ssyr_p(layout, upper_lower, n, alpha, x, incx, a, lda); @@ -1310,12 +1310,12 @@ static void cblas_ssyr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, cons } static void cblas_dsyr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a, + const double alpha, const double* x, const int incx, double* a, const int lda) { if (cblas_library() != NULL) { if (cblas_dsyr_p == NULL) cblas_dsyr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a, + const double alpha, const double* x, const int incx, double* a, const int lda))GET_FUNC(h_libcblas, "cblas_dsyr"); if (cblas_dsyr_p != NULL) cblas_dsyr_p(layout, upper_lower, n, alpha, x, incx, a, lda); @@ -1323,13 +1323,13 @@ static void cblas_dsyr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, cons } static void cblas_ssyr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a, const int lda) { + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a, const int lda) { if (cblas_library() != NULL) { if (cblas_ssyr2_p == NULL) cblas_ssyr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, - const float *y, const int incy, float *a, + const float alpha, const float* x, const int incx, + const float* y, const int incy, float* a, const int lda))GET_FUNC(h_libcblas, "cblas_ssyr2"); if (cblas_ssyr2_p != NULL) cblas_ssyr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a, lda); @@ -1337,13 +1337,13 @@ static void cblas_ssyr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_dsyr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, - const double *y, const int incy, double *a, const int lda) { + const double alpha, const double* x, const int incx, + const double* y, const int incy, double* a, const int lda) { if (cblas_library() != NULL) { if (cblas_dsyr2_p == NULL) cblas_dsyr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, - const double *y, const int incy, double *a, + const double alpha, const double* x, const int incx, + const double* y, const int incy, double* a, const int lda))GET_FUNC(h_libcblas, "cblas_dsyr2"); if (cblas_dsyr2_p != NULL) cblas_dsyr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a, lda); @@ -1351,13 +1351,13 @@ static void cblas_dsyr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_sspmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const float *x, const int incx, - const float beta, float *y, const int incy) { + const float alpha, const float* a, const float* x, const int incx, + const float beta, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_sspmv_p == NULL) cblas_sspmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const float *x, - const int incx, const float beta, float *y, + const float alpha, const float* a, const float* x, + const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sspmv"); if (cblas_sspmv_p != NULL) cblas_sspmv_p(layout, upper_lower, n, alpha, a, x, incx, beta, y, incy); @@ -1365,13 +1365,13 @@ static void cblas_sspmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_dspmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const double *x, - const int incx, const double beta, double *y, const int incy) { + const double alpha, const double* a, const double* x, + const int incx, const double beta, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dspmv_p == NULL) cblas_dspmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const double *x, - const int incx, const double beta, double *y, + const double alpha, const double* a, const double* x, + const int incx, const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dspmv"); if (cblas_dspmv_p != NULL) cblas_dspmv_p(layout, upper_lower, n, alpha, a, x, incx, beta, y, incy); @@ -1379,65 +1379,65 @@ static void cblas_dspmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_sspr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a) { + const float alpha, const float* x, const int incx, float* a) { if (cblas_library() != NULL) { if (cblas_sspr_p == NULL) cblas_sspr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, - float *a))GET_FUNC(h_libcblas, "cblas_sspr"); + const float alpha, const float* x, const int incx, + float* a))GET_FUNC(h_libcblas, "cblas_sspr"); if (cblas_sspr_p != NULL) cblas_sspr_p(layout, upper_lower, n, alpha, x, incx, a); } } static void cblas_dspr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a) { + const double alpha, const double* x, const int incx, double* a) { if (cblas_library() != NULL) { if (cblas_dspr_p == NULL) cblas_dspr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, - double *a))GET_FUNC(h_libcblas, "cblas_dspr"); + const double alpha, const double* x, const int incx, + double* a))GET_FUNC(h_libcblas, "cblas_dspr"); if (cblas_dspr_p != NULL) cblas_dspr_p(layout, upper_lower, n, alpha, x, incx, a); } } static void cblas_sspr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a) { + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a) { if (cblas_library() != NULL) { if (cblas_sspr2_p == NULL) cblas_sspr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a))GET_FUNC(h_libcblas, "cblas_sspr2"); + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a))GET_FUNC(h_libcblas, "cblas_sspr2"); if (cblas_sspr2_p != NULL) cblas_sspr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a); } } static void cblas_dspr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, - const double *y, const int incy, double *a) { + const double alpha, const double* x, const int incx, + const double* y, const int incy, double* a) { if (cblas_library() != NULL) { if (cblas_dspr2_p == NULL) cblas_dspr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, const double *y, - const int incy, double *a))GET_FUNC(h_libcblas, "cblas_dspr2"); + const double alpha, const double* x, const int incx, const double* y, + const int incy, double* a))GET_FUNC(h_libcblas, "cblas_dspr2"); if (cblas_dspr2_p != NULL) cblas_dspr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a); } } static void cblas_stbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const float *a, - const int lda, float *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const float* a, + const int lda, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_stbmv_p == NULL) cblas_stbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const float *a, const int lda, float *x, + const int k, const float* a, const int lda, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_stbmv"); if (cblas_stbmv_p != NULL) cblas_stbmv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1445,13 +1445,13 @@ static void cblas_stbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_dtbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const double *a, - const int lda, double *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const double* a, + const int lda, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtbmv_p == NULL) cblas_dtbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const double *a, const int lda, double *x, + const int k, const double* a, const int lda, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtbmv"); if (cblas_dtbmv_p != NULL) cblas_dtbmv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1459,13 +1459,13 @@ static void cblas_dtbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ctbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctbmv_p == NULL) cblas_ctbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const void *a, const int lda, void *x, + const int k, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctbmv"); if (cblas_ctbmv_p != NULL) cblas_ctbmv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1473,13 +1473,13 @@ static void cblas_ctbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztbmv_p == NULL) cblas_ztbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const void *a, const int lda, void *x, + const int k, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztbmv"); if (cblas_ztbmv_p != NULL) cblas_ztbmv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1487,13 +1487,13 @@ static void cblas_ztbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_stbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const float *a, - const int lda, float *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const float* a, + const int lda, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_stbsv_p == NULL) cblas_stbsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const float *a, const int lda, float *x, + const int k, const float* a, const int lda, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_stbsv"); if (cblas_stbsv_p != NULL) cblas_stbsv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1501,13 +1501,13 @@ static void cblas_stbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_dtbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const double *a, - const int lda, double *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const double* a, + const int lda, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtbsv_p == NULL) cblas_dtbsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const double *a, const int lda, double *x, + const int k, const double* a, const int lda, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtbsv"); if (cblas_dtbsv_p != NULL) cblas_dtbsv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1515,13 +1515,13 @@ static void cblas_dtbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ctbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctbsv_p == NULL) cblas_ctbsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const void *a, const int lda, void *x, + const int k, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctbsv"); if (cblas_ctbsv_p != NULL) cblas_ctbsv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1529,13 +1529,13 @@ static void cblas_ctbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztbsv_p == NULL) cblas_ztbsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const void *a, const int lda, void *x, + const int k, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztbsv"); if (cblas_ztbsv_p != NULL) cblas_ztbsv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1543,13 +1543,13 @@ static void cblas_ztbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_stpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_stpmv_p == NULL) cblas_stpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_stpmv"); if (cblas_stpmv_p != NULL) cblas_stpmv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1557,13 +1557,13 @@ static void cblas_stpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_dtpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtpmv_p == NULL) cblas_dtpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtpmv"); if (cblas_dtpmv_p != NULL) cblas_dtpmv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1571,13 +1571,13 @@ static void cblas_dtpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ctpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctpmv_p == NULL) cblas_ctpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctpmv"); if (cblas_ctpmv_p != NULL) cblas_ctpmv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1585,13 +1585,13 @@ static void cblas_ctpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztpmv_p == NULL) cblas_ztpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztpmv"); if (cblas_ztpmv_p != NULL) cblas_ztpmv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1599,13 +1599,13 @@ static void cblas_ztpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_stpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_stpsv_p == NULL) cblas_stpsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_stpsv"); if (cblas_stpsv_p != NULL) cblas_stpsv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1613,13 +1613,13 @@ static void cblas_stpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_dtpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtpsv_p == NULL) cblas_dtpsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtpsv"); if (cblas_dtpsv_p != NULL) cblas_dtpsv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1627,13 +1627,13 @@ static void cblas_dtpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ctpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctpsv_p == NULL) cblas_ctpsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctpsv"); if (cblas_ctpsv_p != NULL) cblas_ctpsv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1641,13 +1641,13 @@ static void cblas_ctpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztpsv_p == NULL) cblas_ztpsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztpsv"); if (cblas_ztpsv_p != NULL) cblas_ztpsv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1655,41 +1655,41 @@ static void cblas_ztpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_strmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_strmv_p == NULL) cblas_strmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx))GET_FUNC(h_libcblas, "cblas_strmv"); + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx))GET_FUNC(h_libcblas, "cblas_strmv"); if (cblas_strmv_p != NULL) cblas_strmv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); } } static void cblas_dtrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtrmv_p == NULL) cblas_dtrmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx))GET_FUNC(h_libcblas, "cblas_dtrmv"); + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtrmv"); if (cblas_dtrmv_p != NULL) cblas_dtrmv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); } } static void cblas_ctrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctrmv_p == NULL) cblas_ctrmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctrmv"); if (cblas_ctrmv_p != NULL) cblas_ctrmv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); @@ -1697,13 +1697,13 @@ static void cblas_ctrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztrmv_p == NULL) cblas_ztrmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztrmv"); if (cblas_ztrmv_p != NULL) cblas_ztrmv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); @@ -1711,41 +1711,41 @@ static void cblas_ztrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_strsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_strsv_p == NULL) cblas_strsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx))GET_FUNC(h_libcblas, "cblas_strsv"); + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx))GET_FUNC(h_libcblas, "cblas_strsv"); if (cblas_strsv_p != NULL) cblas_strsv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); } } static void cblas_dtrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtrsv_p == NULL) cblas_dtrsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx))GET_FUNC(h_libcblas, "cblas_dtrsv"); + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtrsv"); if (cblas_dtrsv_p != NULL) cblas_dtrsv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); } } static void cblas_ctrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctrsv_p == NULL) cblas_ctrsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctrsv"); if (cblas_ctrsv_p != NULL) cblas_ctrsv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); @@ -1753,13 +1753,13 @@ static void cblas_ctrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztrsv_p == NULL) cblas_ztrsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztrsv"); if (cblas_ztrsv_p != NULL) cblas_ztrsv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); @@ -1768,81 +1768,81 @@ static void cblas_ztrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL /* Level 1 */ -static float (*cblas_sasum_p)(const int n, const float *x, const int incx); -static double (*cblas_dasum_p)(const int n, const double *x, const int incx); -static float (*cblas_scasum_p)(const int n, const void *x, const int incx); -static double (*cblas_dzasum_p)(const int n, const void *x, const int incx); -static void (*cblas_saxpy_p)(const int n, const float alpha, const float *x, const int incx, - float *y, const int incy); -static void (*cblas_daxpy_p)(const int n, const double alpha, const double *x, const int incx, - double *y, const int incy); -static void (*cblas_caxpy_p)(const int n, const void *alpha, const void *x, const int incx, void *y, +static float (*cblas_sasum_p)(const int n, const float* x, const int incx); +static double (*cblas_dasum_p)(const int n, const double* x, const int incx); +static float (*cblas_scasum_p)(const int n, const void* x, const int incx); +static double (*cblas_dzasum_p)(const int n, const void* x, const int incx); +static void (*cblas_saxpy_p)(const int n, const float alpha, const float* x, const int incx, + float* y, const int incy); +static void (*cblas_daxpy_p)(const int n, const double alpha, const double* x, const int incx, + double* y, const int incy); +static void (*cblas_caxpy_p)(const int n, const void* alpha, const void* x, const int incx, void* y, const int incy); -static void (*cblas_zaxpy_p)(const int n, const void *alpha, const void *x, const int incx, void *y, +static void (*cblas_zaxpy_p)(const int n, const void* alpha, const void* x, const int incx, void* y, const int incy); -static void (*cblas_scopy_p)(const int n, const float *x, const int incx, float *y, const int incy); -static void (*cblas_dcopy_p)(const int n, const double *x, const int incx, double *y, +static void (*cblas_scopy_p)(const int n, const float* x, const int incx, float* y, const int incy); +static void (*cblas_dcopy_p)(const int n, const double* x, const int incx, double* y, const int incy); -static void (*cblas_ccopy_p)(const int n, const void *x, const int incx, void *y, const int incy); -static void (*cblas_zcopy_p)(const int n, const void *x, const int incx, void *y, const int incy); -static float (*cblas_sdot_p)(const int n, const float *x, const int incx, const float *y, +static void (*cblas_ccopy_p)(const int n, const void* x, const int incx, void* y, const int incy); +static void (*cblas_zcopy_p)(const int n, const void* x, const int incx, void* y, const int incy); +static float (*cblas_sdot_p)(const int n, const float* x, const int incx, const float* y, const int incy); -static double (*cblas_ddot_p)(const int n, const double *x, const int incx, const double *y, +static double (*cblas_ddot_p)(const int n, const double* x, const int incx, const double* y, const int incy); -static double (*cblas_dsdot_p)(const int n, const float *x, const int incx, const float *y, +static double (*cblas_dsdot_p)(const int n, const float* x, const int incx, const float* y, const int incy); -static float (*cblas_sdsdot_p)(const int n, const float sb, const float *x, const int incx, - const float *y, const int incy); -static float (*cblas_snrm2_p)(const int n, const float *x, const int incx); -static double (*cblas_dnrm2_p)(const int n, const double *x, const int incx); -static float (*cblas_scnrm2_p)(const int n, const void *x, const int incx); -static double (*cblas_dznrm2_p)(const int n, const void *x, const int incx); -static void (*cblas_srot_p)(const int n, float *x, const int incx, float *y, const int incy, +static float (*cblas_sdsdot_p)(const int n, const float sb, const float* x, const int incx, + const float* y, const int incy); +static float (*cblas_snrm2_p)(const int n, const float* x, const int incx); +static double (*cblas_dnrm2_p)(const int n, const double* x, const int incx); +static float (*cblas_scnrm2_p)(const int n, const void* x, const int incx); +static double (*cblas_dznrm2_p)(const int n, const void* x, const int incx); +static void (*cblas_srot_p)(const int n, float* x, const int incx, float* y, const int incy, const float c, const float s); -static void (*cblas_drot_p)(const int n, double *x, const int incx, double *y, const int incy, +static void (*cblas_drot_p)(const int n, double* x, const int incx, double* y, const int incy, const double c, const double s); -static void (*csrot_p)(const int *n, void *x, const int *incx, void *y, const int *incy, - const float *c, const float *s); -static void (*zdrot_p)(const int *n, void *x, const int *incx, void *y, const int *incy, - const double *c, const double *s); -static void (*cblas_srotg_p)(float *a, float *b, float *c, float *s); -static void (*cblas_drotg_p)(double *a, double *b, double *c, double *s); -static void (*crotg_p)(void *a, void *b, float *c, void *s); -static void (*zrotg_p)(void *a, void *b, double *c, void *s); -static void (*cblas_srotm_p)(const int n, float *x, const int incx, float *y, const int incy, - const float *param); -static void (*cblas_drotm_p)(const int n, double *x, const int incx, double *y, const int incy, - const double *param); -static void (*cblas_srotmg_p)(float *d1, float *d2, float *x1, float y1, float *param); -static void (*cblas_drotmg_p)(double *d1, double *d2, double *x1, double y1, double *param); -static void (*cblas_sscal_p)(const int n, const float alpha, float *x, const int incx); -static void (*cblas_dscal_p)(const int n, const double alpha, double *x, const int incx); -static void (*cblas_cscal_p)(const int n, const void *alpha, void *x, const int incx); -static void (*cblas_zscal_p)(const int n, const void *alpha, void *x, const int incx); -static void (*cblas_csscal_p)(const int n, const float alpha, void *x, const int incx); -static void (*cblas_zdscal_p)(const int n, const double alpha, void *x, const int incx); -static void (*cblas_sswap_p)(const int n, float *x, const int incx, float *y, const int incy); -static void (*cblas_dswap_p)(const int n, double *x, const int incx, double *y, const int incy); -static void (*cblas_cswap_p)(const int n, void *x, const int incx, void *y, const int incy); -static void (*cblas_zswap_p)(const int n, void *x, const int incx, void *y, const int incy); -static void (*cblas_cdotc_sub_p)(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres); -static void (*cblas_zdotc_sub_p)(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres); -static void (*cblas_cdotu_sub_p)(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres); -static void (*cblas_zdotu_sub_p)(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres); -static int (*cblas_isamax_p)(const int n, const float *x, const int incx); -static int (*cblas_idamax_p)(const int n, const double *x, const int incx); -static int (*cblas_icamax_p)(const int n, const void *x, const int incx); -static int (*cblas_izamax_p)(const int n, const void *x, const int incx); - -static float cblas_sasum_wrapper(const int n, const float *x, const int incx) { +static void (*csrot_p)(const int* n, void* x, const int* incx, void* y, const int* incy, + const float* c, const float* s); +static void (*zdrot_p)(const int* n, void* x, const int* incx, void* y, const int* incy, + const double* c, const double* s); +static void (*cblas_srotg_p)(float* a, float* b, float* c, float* s); +static void (*cblas_drotg_p)(double* a, double* b, double* c, double* s); +static void (*crotg_p)(void* a, void* b, float* c, void* s); +static void (*zrotg_p)(void* a, void* b, double* c, void* s); +static void (*cblas_srotm_p)(const int n, float* x, const int incx, float* y, const int incy, + const float* param); +static void (*cblas_drotm_p)(const int n, double* x, const int incx, double* y, const int incy, + const double* param); +static void (*cblas_srotmg_p)(float* d1, float* d2, float* x1, float y1, float* param); +static void (*cblas_drotmg_p)(double* d1, double* d2, double* x1, double y1, double* param); +static void (*cblas_sscal_p)(const int n, const float alpha, float* x, const int incx); +static void (*cblas_dscal_p)(const int n, const double alpha, double* x, const int incx); +static void (*cblas_cscal_p)(const int n, const void* alpha, void* x, const int incx); +static void (*cblas_zscal_p)(const int n, const void* alpha, void* x, const int incx); +static void (*cblas_csscal_p)(const int n, const float alpha, void* x, const int incx); +static void (*cblas_zdscal_p)(const int n, const double alpha, void* x, const int incx); +static void (*cblas_sswap_p)(const int n, float* x, const int incx, float* y, const int incy); +static void (*cblas_dswap_p)(const int n, double* x, const int incx, double* y, const int incy); +static void (*cblas_cswap_p)(const int n, void* x, const int incx, void* y, const int incy); +static void (*cblas_zswap_p)(const int n, void* x, const int incx, void* y, const int incy); +static void (*cblas_cdotc_sub_p)(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres); +static void (*cblas_zdotc_sub_p)(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres); +static void (*cblas_cdotu_sub_p)(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres); +static void (*cblas_zdotu_sub_p)(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres); +static int (*cblas_isamax_p)(const int n, const float* x, const int incx); +static int (*cblas_idamax_p)(const int n, const double* x, const int incx); +static int (*cblas_icamax_p)(const int n, const void* x, const int incx); +static int (*cblas_izamax_p)(const int n, const void* x, const int incx); + +static float cblas_sasum_wrapper(const int n, const float* x, const int incx) { float sasum_res = 0.0f; if (cblas_library() != NULL) { if (cblas_sasum_p == NULL) - cblas_sasum_p = (float (*)(const int n, const float *x, const int incx))GET_FUNC( + cblas_sasum_p = (float (*)(const int n, const float* x, const int incx))GET_FUNC( h_libcblas, "cblas_sasum"); if (cblas_sasum_p != NULL) sasum_res = cblas_sasum_p(n, x, incx); @@ -1850,11 +1850,11 @@ static float cblas_sasum_wrapper(const int n, const float *x, const int incx) { return sasum_res; } -static double cblas_dasum_wrapper(const int n, const double *x, const int incx) { +static double cblas_dasum_wrapper(const int n, const double* x, const int incx) { double dasum_res = 0.0; if (cblas_library() != NULL) { if (cblas_dasum_p == NULL) - cblas_dasum_p = (double (*)(const int n, const double *x, const int incx))GET_FUNC( + cblas_dasum_p = (double (*)(const int n, const double* x, const int incx))GET_FUNC( h_libcblas, "cblas_dasum"); if (cblas_dasum_p != NULL) dasum_res = cblas_dasum_p(n, x, incx); @@ -1862,11 +1862,11 @@ static double cblas_dasum_wrapper(const int n, const double *x, const int incx) return dasum_res; } -static float cblas_scasum_wrapper(const int n, const void *x, const int incx) { +static float cblas_scasum_wrapper(const int n, const void* x, const int incx) { float scasum_res = 0.0f; if (cblas_library() != NULL) { if (cblas_scasum_p == NULL) - cblas_scasum_p = (float (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_scasum_p = (float (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_scasum"); if (cblas_scasum_p != NULL) scasum_res = cblas_scasum_p(n, x, incx); @@ -1874,11 +1874,11 @@ static float cblas_scasum_wrapper(const int n, const void *x, const int incx) { return scasum_res; } -static double cblas_dzasum_wrapper(const int n, const void *x, const int incx) { +static double cblas_dzasum_wrapper(const int n, const void* x, const int incx) { double dzasum_res = 0.0; if (cblas_library() != NULL) { if (cblas_dzasum_p == NULL) - cblas_dzasum_p = (double (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_dzasum_p = (double (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_dzasum"); if (cblas_dzasum_p != NULL) dzasum_res = cblas_dzasum_p(n, x, incx); @@ -1886,102 +1886,102 @@ static double cblas_dzasum_wrapper(const int n, const void *x, const int incx) { return dzasum_res; } -static void cblas_saxpy_wrapper(const int n, const float alpha, const float *x, const int incx, - float *y, const int incy) { +static void cblas_saxpy_wrapper(const int n, const float alpha, const float* x, const int incx, + float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_saxpy_p == NULL) cblas_saxpy_p = - (void (*)(const int n, const float alpha, const float *x, const int incx, float *y, + (void (*)(const int n, const float alpha, const float* x, const int incx, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_saxpy"); if (cblas_saxpy_p != NULL) cblas_saxpy_p(n, alpha, x, incx, y, incy); } } -static void cblas_daxpy_wrapper(const int n, const double alpha, const double *x, const int incx, - double *y, const int incy) { +static void cblas_daxpy_wrapper(const int n, const double alpha, const double* x, const int incx, + double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_daxpy_p == NULL) cblas_daxpy_p = - (void (*)(const int n, const double alpha, const double *x, const int incx, - double *y, const int incy))GET_FUNC(h_libcblas, "cblas_daxpy"); + (void (*)(const int n, const double alpha, const double* x, const int incx, + double* y, const int incy))GET_FUNC(h_libcblas, "cblas_daxpy"); if (cblas_daxpy_p != NULL) cblas_daxpy_p(n, alpha, x, incx, y, incy); } } -static void cblas_caxpy_wrapper(const int n, const void *alpha, const void *x, const int incx, - void *y, const int incy) { +static void cblas_caxpy_wrapper(const int n, const void* alpha, const void* x, const int incx, + void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_caxpy_p == NULL) - cblas_caxpy_p = (void (*)(const int n, const void *alpha, const void *x, const int incx, - void *y, const int incy))GET_FUNC(h_libcblas, "cblas_caxpy"); + cblas_caxpy_p = (void (*)(const int n, const void* alpha, const void* x, const int incx, + void* y, const int incy))GET_FUNC(h_libcblas, "cblas_caxpy"); if (cblas_caxpy_p != NULL) cblas_caxpy_p(n, alpha, x, incx, y, incy); } } -static void cblas_zaxpy_wrapper(const int n, const void *alpha, const void *x, const int incx, - void *y, const int incy) { +static void cblas_zaxpy_wrapper(const int n, const void* alpha, const void* x, const int incx, + void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zaxpy_p == NULL) - cblas_zaxpy_p = (void (*)(const int n, const void *alpha, const void *x, const int incx, - void *y, const int incy))GET_FUNC(h_libcblas, "cblas_zaxpy"); + cblas_zaxpy_p = (void (*)(const int n, const void* alpha, const void* x, const int incx, + void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zaxpy"); if (cblas_zaxpy_p != NULL) cblas_zaxpy_p(n, alpha, x, incx, y, incy); } } -static void cblas_scopy_wrapper(const int n, const float *x, const int incx, float *y, +static void cblas_scopy_wrapper(const int n, const float* x, const int incx, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_scopy_p == NULL) - cblas_scopy_p = (void (*)(const int n, const float *x, const int incx, float *y, + cblas_scopy_p = (void (*)(const int n, const float* x, const int incx, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_scopy"); if (cblas_scopy_p != NULL) cblas_scopy_p(n, x, incx, y, incy); } } -static void cblas_dcopy_wrapper(const int n, const double *x, const int incx, double *y, +static void cblas_dcopy_wrapper(const int n, const double* x, const int incx, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dcopy_p == NULL) - cblas_dcopy_p = (void (*)(const int n, const double *x, const int incx, double *y, + cblas_dcopy_p = (void (*)(const int n, const double* x, const int incx, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dcopy"); if (cblas_dcopy_p != NULL) cblas_dcopy_p(n, x, incx, y, incy); } } -static void cblas_ccopy_wrapper(const int n, const void *x, const int incx, void *y, +static void cblas_ccopy_wrapper(const int n, const void* x, const int incx, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_ccopy_p == NULL) - cblas_ccopy_p = (void (*)(const int n, const void *x, const int incx, void *y, + cblas_ccopy_p = (void (*)(const int n, const void* x, const int incx, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_ccopy"); if (cblas_ccopy_p != NULL) cblas_ccopy_p(n, x, incx, y, incy); } } -static void cblas_zcopy_wrapper(const int n, const void *x, const int incx, void *y, +static void cblas_zcopy_wrapper(const int n, const void* x, const int incx, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zcopy_p == NULL) - cblas_zcopy_p = (void (*)(const int n, const void *x, const int incx, void *y, + cblas_zcopy_p = (void (*)(const int n, const void* x, const int incx, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zcopy"); if (cblas_zcopy_p != NULL) cblas_zcopy_p(n, x, incx, y, incy); } } -static float cblas_sdot_wrapper(const int n, const float *x, const int incx, const float *y, +static float cblas_sdot_wrapper(const int n, const float* x, const int incx, const float* y, const int incy) { float sdot_res = 0.0f; if (cblas_library() != NULL) { if (cblas_sdot_p == NULL) - cblas_sdot_p = (float (*)(const int n, const float *x, const int incx, const float *y, + cblas_sdot_p = (float (*)(const int n, const float* x, const int incx, const float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sdot"); if (cblas_sdot_p != NULL) sdot_res = cblas_sdot_p(n, x, incx, y, incy); @@ -1989,13 +1989,13 @@ static float cblas_sdot_wrapper(const int n, const float *x, const int incx, con return sdot_res; } -static double cblas_ddot_wrapper(const int n, const double *x, const int incx, const double *y, +static double cblas_ddot_wrapper(const int n, const double* x, const int incx, const double* y, const int incy) { double ddot_res = 0.0; if (cblas_library() != NULL) { if (cblas_ddot_p == NULL) cblas_ddot_p = - (double (*)(const int n, const double *x, const int incx, const double *y, + (double (*)(const int n, const double* x, const int incx, const double* y, const int incy))GET_FUNC(h_libcblas, "cblas_ddot"); if (cblas_ddot_p != NULL) ddot_res = cblas_ddot_p(n, x, incx, y, incy); @@ -2003,12 +2003,12 @@ static double cblas_ddot_wrapper(const int n, const double *x, const int incx, c return ddot_res; } -static double cblas_dsdot_wrapper(const int n, const float *x, const int incx, const float *y, +static double cblas_dsdot_wrapper(const int n, const float* x, const int incx, const float* y, const int incy) { double dsdot_res = 0.0; if (cblas_library() != NULL) { if (cblas_dsdot_p == NULL) - cblas_dsdot_p = (double (*)(const int n, const float *x, const int incx, const float *y, + cblas_dsdot_p = (double (*)(const int n, const float* x, const int incx, const float* y, const int incy))GET_FUNC(h_libcblas, "cblas_dsdot"); if (cblas_dsdot_p != NULL) dsdot_res = cblas_dsdot_p(n, x, incx, y, incy); @@ -2016,25 +2016,25 @@ static double cblas_dsdot_wrapper(const int n, const float *x, const int incx, c return dsdot_res; } -static float cblas_sdsdot_wrapper(const int n, const float sb, const float *x, const int incx, - const float *y, const int incy) { +static float cblas_sdsdot_wrapper(const int n, const float sb, const float* x, const int incx, + const float* y, const int incy) { float sdsdot_res = 0.0f; if (cblas_library() != NULL) { if (cblas_sdsdot_p == NULL) cblas_sdsdot_p = - (float (*)(const int n, const float sb, const float *x, const int incx, - const float *y, const int incy))GET_FUNC(h_libcblas, "cblas_sdsdot"); + (float (*)(const int n, const float sb, const float* x, const int incx, + const float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sdsdot"); if (cblas_sdsdot_p != NULL) sdsdot_res = cblas_sdsdot_p(n, sb, x, incx, y, incy); } return sdsdot_res; } -static float cblas_snrm2_wrapper(const int n, const float *x, const int incx) { +static float cblas_snrm2_wrapper(const int n, const float* x, const int incx) { float snrm2_res = 0.0f; if (cblas_library() != NULL) { if (cblas_snrm2_p == NULL) - cblas_snrm2_p = (float (*)(const int n, const float *x, const int incx))GET_FUNC( + cblas_snrm2_p = (float (*)(const int n, const float* x, const int incx))GET_FUNC( h_libcblas, "cblas_snrm2"); if (cblas_snrm2_p != NULL) snrm2_res = cblas_snrm2_p(n, x, incx); @@ -2042,11 +2042,11 @@ static float cblas_snrm2_wrapper(const int n, const float *x, const int incx) { return snrm2_res; } -static double cblas_dnrm2_wrapper(const int n, const double *x, const int incx) { +static double cblas_dnrm2_wrapper(const int n, const double* x, const int incx) { double dnrm2_res = 0.0; if (cblas_library() != NULL) { if (cblas_dnrm2_p == NULL) - cblas_dnrm2_p = (double (*)(const int n, const double *x, const int incx))GET_FUNC( + cblas_dnrm2_p = (double (*)(const int n, const double* x, const int incx))GET_FUNC( h_libcblas, "cblas_dnrm2"); if (cblas_dnrm2_p != NULL) dnrm2_res = cblas_dnrm2_p(n, x, incx); @@ -2054,11 +2054,11 @@ static double cblas_dnrm2_wrapper(const int n, const double *x, const int incx) return dnrm2_res; } -static float cblas_scnrm2_wrapper(const int n, const void *x, const int incx) { +static float cblas_scnrm2_wrapper(const int n, const void* x, const int incx) { float scnrm2_res = 0.0f; if (cblas_library() != NULL) { if (cblas_scnrm2_p == NULL) - cblas_scnrm2_p = (float (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_scnrm2_p = (float (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_scnrm2"); if (cblas_scnrm2_p != NULL) scnrm2_res = cblas_scnrm2_p(n, x, incx); @@ -2066,11 +2066,11 @@ static float cblas_scnrm2_wrapper(const int n, const void *x, const int incx) { return scnrm2_res; } -static double cblas_dznrm2_wrapper(const int n, const void *x, const int incx) { +static double cblas_dznrm2_wrapper(const int n, const void* x, const int incx) { double dznrm2_res = 0.0; if (cblas_library() != NULL) { if (cblas_dznrm2_p == NULL) - cblas_dznrm2_p = (double (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_dznrm2_p = (double (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_dznrm2"); if (cblas_dznrm2_p != NULL) dznrm2_res = cblas_dznrm2_p(n, x, incx); @@ -2078,297 +2078,297 @@ static double cblas_dznrm2_wrapper(const int n, const void *x, const int incx) { return dznrm2_res; } -static void cblas_srot_wrapper(const int n, float *x, const int incx, float *y, const int incy, +static void cblas_srot_wrapper(const int n, float* x, const int incx, float* y, const int incy, const float c, const float s) { if (cblas_library() != NULL) { if (cblas_srot_p == NULL) cblas_srot_p = - (void (*)(const int n, float *x, const int incx, float *y, const int incy, + (void (*)(const int n, float* x, const int incx, float* y, const int incy, const float c, const float s))GET_FUNC(h_libcblas, "cblas_srot"); if (cblas_srot_p != NULL) cblas_srot_p(n, x, incx, y, incy, c, s); } } -static void cblas_drot_wrapper(const int n, double *x, const int incx, double *y, const int incy, +static void cblas_drot_wrapper(const int n, double* x, const int incx, double* y, const int incy, const double c, const double s) { if (cblas_library() != NULL) { if (cblas_drot_p == NULL) cblas_drot_p = - (void (*)(const int n, double *x, const int incx, double *y, const int incy, + (void (*)(const int n, double* x, const int incx, double* y, const int incy, const double c, const double s))GET_FUNC(h_libcblas, "cblas_drot"); if (cblas_drot_p != NULL) cblas_drot_p(n, x, incx, y, incy, c, s); } } -static void csrot_wrapper(const int *n, void *x, const int *incx, void *y, const int *incy, - const float *c, const float *s) { +static void csrot_wrapper(const int* n, void* x, const int* incx, void* y, const int* incy, + const float* c, const float* s) { if (blas_library() != NULL) { if (csrot_p == NULL) - csrot_p = (void (*)(const int *n, void *x, const int *incx, void *y, const int *incy, - const float *c, const float *s))GET_FUNC(h_libblas, "csrot_"); + csrot_p = (void (*)(const int* n, void* x, const int* incx, void* y, const int* incy, + const float* c, const float* s))GET_FUNC(h_libblas, "csrot_"); if (csrot_p == NULL) - csrot_p = (void (*)(const int *n, void *x, const int *incx, void *y, const int *incy, - const float *c, const float *s))GET_FUNC(h_libblas, "CSROT"); + csrot_p = (void (*)(const int* n, void* x, const int* incx, void* y, const int* incy, + const float* c, const float* s))GET_FUNC(h_libblas, "CSROT"); if (csrot_p != NULL) csrot_p(n, x, incx, y, incy, c, s); } } -static void zdrot_wrapper(const int *n, void *x, const int *incx, void *y, const int *incy, - const double *c, const double *s) { +static void zdrot_wrapper(const int* n, void* x, const int* incx, void* y, const int* incy, + const double* c, const double* s) { if (blas_library() != NULL) { if (zdrot_p == NULL) - zdrot_p = (void (*)(const int *n, void *x, const int *incx, void *y, const int *incy, - const double *c, const double *s))GET_FUNC(h_libblas, "zdrot_"); + zdrot_p = (void (*)(const int* n, void* x, const int* incx, void* y, const int* incy, + const double* c, const double* s))GET_FUNC(h_libblas, "zdrot_"); if (zdrot_p == NULL) - zdrot_p = (void (*)(const int *n, void *x, const int *incx, void *y, const int *incy, - const double *c, const double *s))GET_FUNC(h_libblas, "ZDROT"); + zdrot_p = (void (*)(const int* n, void* x, const int* incx, void* y, const int* incy, + const double* c, const double* s))GET_FUNC(h_libblas, "ZDROT"); if (zdrot_p != NULL) zdrot_p(n, x, incx, y, incy, c, s); } } -static void cblas_srotg_wrapper(float *a, float *b, float *c, float *s) { +static void cblas_srotg_wrapper(float* a, float* b, float* c, float* s) { if (cblas_library() != NULL) { if (cblas_srotg_p == NULL) - cblas_srotg_p = (void (*)(float *a, float *b, float *c, float *s))GET_FUNC( + cblas_srotg_p = (void (*)(float* a, float* b, float* c, float* s))GET_FUNC( h_libcblas, "cblas_srotg"); if (cblas_srotg_p != NULL) cblas_srotg_p(a, b, c, s); } } -static void cblas_drotg_wrapper(double *a, double *b, double *c, double *s) { +static void cblas_drotg_wrapper(double* a, double* b, double* c, double* s) { if (cblas_library() != NULL) { if (cblas_drotg_p == NULL) - cblas_drotg_p = (void (*)(double *a, double *b, double *c, double *s))GET_FUNC( + cblas_drotg_p = (void (*)(double* a, double* b, double* c, double* s))GET_FUNC( h_libcblas, "cblas_drotg"); if (cblas_drotg_p != NULL) cblas_drotg_p(a, b, c, s); } } -static void crotg_wrapper(void *a, void *b, float *c, void *s) { +static void crotg_wrapper(void* a, void* b, float* c, void* s) { if (blas_library() != NULL) { if (crotg_p == NULL) - crotg_p = (void (*)(void *a, void *b, float *c, void *s))GET_FUNC(h_libblas, "crotg_"); + crotg_p = (void (*)(void* a, void* b, float* c, void* s))GET_FUNC(h_libblas, "crotg_"); if (crotg_p == NULL) - crotg_p = (void (*)(void *a, void *b, float *c, void *s))GET_FUNC(h_libblas, "CROTG"); + crotg_p = (void (*)(void* a, void* b, float* c, void* s))GET_FUNC(h_libblas, "CROTG"); if (crotg_p != NULL) crotg_p(a, b, c, s); } } -static void zrotg_wrapper(void *a, void *b, double *c, void *s) { +static void zrotg_wrapper(void* a, void* b, double* c, void* s) { if (blas_library() != NULL) { if (zrotg_p == NULL) - zrotg_p = (void (*)(void *a, void *b, double *c, void *s))GET_FUNC(h_libblas, "zrotg_"); + zrotg_p = (void (*)(void* a, void* b, double* c, void* s))GET_FUNC(h_libblas, "zrotg_"); if (zrotg_p == NULL) - zrotg_p = (void (*)(void *a, void *b, double *c, void *s))GET_FUNC(h_libblas, "ZROTG"); + zrotg_p = (void (*)(void* a, void* b, double* c, void* s))GET_FUNC(h_libblas, "ZROTG"); if (zrotg_p != NULL) zrotg_p(a, b, c, s); } } -static void cblas_srotm_wrapper(const int n, float *x, const int incx, float *y, const int incy, - const float *param) { +static void cblas_srotm_wrapper(const int n, float* x, const int incx, float* y, const int incy, + const float* param) { if (cblas_library() != NULL) { if (cblas_srotm_p == NULL) cblas_srotm_p = - (void (*)(const int n, float *x, const int incx, float *y, const int incy, - const float *param))GET_FUNC(h_libcblas, "cblas_srotm"); + (void (*)(const int n, float* x, const int incx, float* y, const int incy, + const float* param))GET_FUNC(h_libcblas, "cblas_srotm"); if (cblas_srotm_p != NULL) cblas_srotm_p(n, x, incx, y, incy, param); } } -static void cblas_drotm_wrapper(const int n, double *x, const int incx, double *y, const int incy, - const double *param) { +static void cblas_drotm_wrapper(const int n, double* x, const int incx, double* y, const int incy, + const double* param) { if (cblas_library() != NULL) { if (cblas_drotm_p == NULL) cblas_drotm_p = - (void (*)(const int n, double *x, const int incx, double *y, const int incy, - const double *param))GET_FUNC(h_libcblas, "cblas_drotm"); + (void (*)(const int n, double* x, const int incx, double* y, const int incy, + const double* param))GET_FUNC(h_libcblas, "cblas_drotm"); if (cblas_drotm_p != NULL) cblas_drotm_p(n, x, incx, y, incy, param); } } -static void cblas_srotmg_wrapper(float *d1, float *d2, float *x1, float y1, float *param) { +static void cblas_srotmg_wrapper(float* d1, float* d2, float* x1, float y1, float* param) { if (cblas_library() != NULL) { if (cblas_srotmg_p == NULL) - cblas_srotmg_p = (void (*)(float *d1, float *d2, float *x1, float y1, - float *param))GET_FUNC(h_libcblas, "cblas_srotmg"); + cblas_srotmg_p = (void (*)(float* d1, float* d2, float* x1, float y1, + float* param))GET_FUNC(h_libcblas, "cblas_srotmg"); if (cblas_srotmg_p != NULL) cblas_srotmg_p(d1, d2, x1, y1, param); } } -static void cblas_drotmg_wrapper(double *d1, double *d2, double *x1, double y1, double *param) { +static void cblas_drotmg_wrapper(double* d1, double* d2, double* x1, double y1, double* param) { if (cblas_library() != NULL) { if (cblas_drotmg_p == NULL) - cblas_drotmg_p = (void (*)(double *d1, double *d2, double *x1, double y1, - double *param))GET_FUNC(h_libcblas, "cblas_drotmg"); + cblas_drotmg_p = (void (*)(double* d1, double* d2, double* x1, double y1, + double* param))GET_FUNC(h_libcblas, "cblas_drotmg"); if (cblas_drotmg_p != NULL) cblas_drotmg_p(d1, d2, x1, y1, param); } } -static void cblas_sscal_wrapper(const int n, const float alpha, float *x, const int incx) { +static void cblas_sscal_wrapper(const int n, const float alpha, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_sscal_p == NULL) - cblas_sscal_p = (void (*)(const int n, const float alpha, float *x, + cblas_sscal_p = (void (*)(const int n, const float alpha, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_sscal"); if (cblas_sscal_p != NULL) cblas_sscal_p(n, alpha, x, incx); } } -static void cblas_dscal_wrapper(const int n, const double alpha, double *x, const int incx) { +static void cblas_dscal_wrapper(const int n, const double alpha, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dscal_p == NULL) - cblas_dscal_p = (void (*)(const int n, const double alpha, double *x, + cblas_dscal_p = (void (*)(const int n, const double alpha, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dscal"); if (cblas_dscal_p != NULL) cblas_dscal_p(n, alpha, x, incx); } } -static void cblas_cscal_wrapper(const int n, const void *alpha, void *x, const int incx) { +static void cblas_cscal_wrapper(const int n, const void* alpha, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_cscal_p == NULL) - cblas_cscal_p = (void (*)(const int n, const void *alpha, void *x, + cblas_cscal_p = (void (*)(const int n, const void* alpha, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_cscal"); if (cblas_cscal_p != NULL) cblas_cscal_p(n, alpha, x, incx); } } -static void cblas_zscal_wrapper(const int n, const void *alpha, void *x, const int incx) { +static void cblas_zscal_wrapper(const int n, const void* alpha, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_zscal_p == NULL) - cblas_zscal_p = (void (*)(const int n, const void *alpha, void *x, + cblas_zscal_p = (void (*)(const int n, const void* alpha, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_zscal"); if (cblas_zscal_p != NULL) cblas_zscal_p(n, alpha, x, incx); } } -static void cblas_csscal_wrapper(const int n, const float alpha, void *x, const int incx) { +static void cblas_csscal_wrapper(const int n, const float alpha, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_csscal_p == NULL) - cblas_csscal_p = (void (*)(const int n, const float alpha, void *x, + cblas_csscal_p = (void (*)(const int n, const float alpha, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_csscal"); if (cblas_csscal_p != NULL) cblas_csscal_p(n, alpha, x, incx); } } -static void cblas_zdscal_wrapper(const int n, const double alpha, void *x, const int incx) { +static void cblas_zdscal_wrapper(const int n, const double alpha, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_zdscal_p == NULL) - cblas_zdscal_p = (void (*)(const int n, const double alpha, void *x, + cblas_zdscal_p = (void (*)(const int n, const double alpha, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_zdscal"); if (cblas_zdscal_p != NULL) cblas_zdscal_p(n, alpha, x, incx); } } -static void cblas_sswap_wrapper(const int n, float *x, const int incx, float *y, const int incy) { +static void cblas_sswap_wrapper(const int n, float* x, const int incx, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_sswap_p == NULL) - cblas_sswap_p = (void (*)(const int n, float *x, const int incx, float *y, + cblas_sswap_p = (void (*)(const int n, float* x, const int incx, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sswap"); if (cblas_sswap_p != NULL) cblas_sswap_p(n, x, incx, y, incy); } } -static void cblas_dswap_wrapper(const int n, double *x, const int incx, double *y, const int incy) { +static void cblas_dswap_wrapper(const int n, double* x, const int incx, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dswap_p == NULL) - cblas_dswap_p = (void (*)(const int n, double *x, const int incx, double *y, + cblas_dswap_p = (void (*)(const int n, double* x, const int incx, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dswap"); if (cblas_dswap_p != NULL) cblas_dswap_p(n, x, incx, y, incy); } } -static void cblas_cswap_wrapper(const int n, void *x, const int incx, void *y, const int incy) { +static void cblas_cswap_wrapper(const int n, void* x, const int incx, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_cswap_p == NULL) - cblas_cswap_p = (void (*)(const int n, void *x, const int incx, void *y, + cblas_cswap_p = (void (*)(const int n, void* x, const int incx, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_cswap"); if (cblas_cswap_p != NULL) cblas_cswap_p(n, x, incx, y, incy); } } -static void cblas_zswap_wrapper(const int n, void *x, const int incx, void *y, const int incy) { +static void cblas_zswap_wrapper(const int n, void* x, const int incx, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zswap_p == NULL) - cblas_zswap_p = (void (*)(const int n, void *x, const int incx, void *y, + cblas_zswap_p = (void (*)(const int n, void* x, const int incx, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zswap"); if (cblas_zswap_p != NULL) cblas_zswap_p(n, x, incx, y, incy); } } -static void cblas_cdotc_sub_wrapper(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres) { +static void cblas_cdotc_sub_wrapper(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres) { if (cblas_library() != NULL) { if (cblas_cdotc_sub_p == NULL) cblas_cdotc_sub_p = - (void (*)(const int n, const void *x, const int incx, const void *y, const int incy, - void *pres))GET_FUNC(h_libcblas, "cblas_cdotc_sub"); + (void (*)(const int n, const void* x, const int incx, const void* y, const int incy, + void* pres))GET_FUNC(h_libcblas, "cblas_cdotc_sub"); if (cblas_cdotc_sub_p != NULL) cblas_cdotc_sub_p(n, x, incx, y, incy, pres); } } -static void cblas_zdotc_sub_wrapper(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres) { +static void cblas_zdotc_sub_wrapper(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres) { if (cblas_library() != NULL) { if (cblas_zdotc_sub_p == NULL) cblas_zdotc_sub_p = - (void (*)(const int n, const void *x, const int incx, const void *y, const int incy, - void *pres))GET_FUNC(h_libcblas, "cblas_zdotc_sub"); + (void (*)(const int n, const void* x, const int incx, const void* y, const int incy, + void* pres))GET_FUNC(h_libcblas, "cblas_zdotc_sub"); if (cblas_zdotc_sub_p != NULL) cblas_zdotc_sub_p(n, x, incx, y, incy, pres); } } -static void cblas_cdotu_sub_wrapper(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres) { +static void cblas_cdotu_sub_wrapper(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres) { if (cblas_library() != NULL) { if (cblas_cdotu_sub_p == NULL) cblas_cdotu_sub_p = - (void (*)(const int n, const void *x, const int incx, const void *y, const int incy, - void *pres))GET_FUNC(h_libcblas, "cblas_cdotu_sub"); + (void (*)(const int n, const void* x, const int incx, const void* y, const int incy, + void* pres))GET_FUNC(h_libcblas, "cblas_cdotu_sub"); if (cblas_cdotu_sub_p != NULL) cblas_cdotu_sub_p(n, x, incx, y, incy, pres); } } -static void cblas_zdotu_sub_wrapper(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres) { +static void cblas_zdotu_sub_wrapper(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres) { if (cblas_library() != NULL) { if (cblas_zdotu_sub_p == NULL) cblas_zdotu_sub_p = - (void (*)(const int n, const void *x, const int incx, const void *y, const int incy, - void *pres))GET_FUNC(h_libcblas, "cblas_zdotu_sub"); + (void (*)(const int n, const void* x, const int incx, const void* y, const int incy, + void* pres))GET_FUNC(h_libcblas, "cblas_zdotu_sub"); if (cblas_zdotu_sub_p != NULL) cblas_zdotu_sub_p(n, x, incx, y, incy, pres); } } -static int cblas_isamax_wrapper(const int n, const float *x, const int incx) { +static int cblas_isamax_wrapper(const int n, const float* x, const int incx) { int isamax_res = 0; if (cblas_library() != NULL) { if (cblas_isamax_p == NULL) - cblas_isamax_p = (int (*)(const int n, const float *x, const int incx))GET_FUNC( + cblas_isamax_p = (int (*)(const int n, const float* x, const int incx))GET_FUNC( h_libcblas, "cblas_isamax"); if (cblas_isamax_p != NULL) isamax_res = cblas_isamax_p(n, x, incx); @@ -2376,11 +2376,11 @@ static int cblas_isamax_wrapper(const int n, const float *x, const int incx) { return isamax_res; } -static int cblas_idamax_wrapper(const int n, const double *x, const int incx) { +static int cblas_idamax_wrapper(const int n, const double* x, const int incx) { int idamax_res = 0; if (cblas_library() != NULL) { if (cblas_idamax_p == NULL) - cblas_idamax_p = (int (*)(const int n, const double *x, const int incx))GET_FUNC( + cblas_idamax_p = (int (*)(const int n, const double* x, const int incx))GET_FUNC( h_libcblas, "cblas_idamax"); if (cblas_idamax_p != NULL) idamax_res = cblas_idamax_p(n, x, incx); @@ -2388,11 +2388,11 @@ static int cblas_idamax_wrapper(const int n, const double *x, const int incx) { return idamax_res; } -static int cblas_icamax_wrapper(const int n, const void *x, const int incx) { +static int cblas_icamax_wrapper(const int n, const void* x, const int incx) { int icamax_res = 0; if (cblas_library() != NULL) { if (cblas_icamax_p == NULL) - cblas_icamax_p = (int (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_icamax_p = (int (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_icamax"); if (cblas_icamax_p != NULL) icamax_res = cblas_icamax_p(n, x, incx); @@ -2400,11 +2400,11 @@ static int cblas_icamax_wrapper(const int n, const void *x, const int incx) { return icamax_res; } -static int cblas_izamax_wrapper(const int n, const void *x, const int incx) { +static int cblas_izamax_wrapper(const int n, const void* x, const int incx) { int izamax_res = 0; if (cblas_library() != NULL) { if (cblas_izamax_p == NULL) - cblas_izamax_p = (int (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_izamax_p = (int (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_izamax"); if (cblas_izamax_p != NULL) izamax_res = cblas_izamax_p(n, x, incx); diff --git a/tests/unit_tests/blas/include/test_common.hpp b/tests/unit_tests/blas/include/test_common.hpp index 25620f4fb..64df0bd76 100644 --- a/tests/unit_tests/blas/include/test_common.hpp +++ b/tests/unit_tests/blas/include/test_common.hpp @@ -79,14 +79,15 @@ constexpr T matrix_size(oneapi::math::transpose trans, T m, T n, T ldm) { return outer_dimension(trans, m, n) * ldm; } template -constexpr T matrix_size(oneapi::math::layout layout, oneapi::math::transpose trans, T m, T n, T ldm) { +constexpr T matrix_size(oneapi::math::layout layout, oneapi::math::transpose trans, T m, T n, + T ldm) { return (layout == oneapi::math::layout::col_major) ? outer_dimension(trans, m, n) * ldm - : inner_dimension(trans, m, n) * ldm; + : inner_dimension(trans, m, n) * ldm; } // SYCL buffer creation helper. template -sycl::buffer make_buffer(const vec &v) { +sycl::buffer make_buffer(const vec& v) { sycl::buffer buf(v.data(), sycl::range<1>(v.size())); return buf; } @@ -174,14 +175,14 @@ std::complex rand_scalar(int mag) { } template -void rand_vector(fp *v, int n, int inc) { +void rand_vector(fp* v, int n, int inc) { int abs_inc = std::abs(inc); for (int i = 0; i < n; i++) v[i * abs_inc] = rand_scalar(); } template -void rand_vector(vec &v, int n, int inc) { +void rand_vector(vec& v, int n, int inc) { using fp = typename vec::value_type; int abs_inc = std::abs(inc); @@ -209,7 +210,7 @@ oneapi::math::transpose rand_trans() { } template -void print_matrix(vec &M, oneapi::math::transpose trans, int m, int n, int ld, char *name) { +void print_matrix(vec& M, oneapi::math::transpose trans, int m, int n, int ld, char* name) { std::cout << "Matrix " << name << ":\n"; for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { @@ -223,15 +224,15 @@ void print_matrix(vec &M, oneapi::math::transpose trans, int m, int n, int ld, c } template -void copy_vector(fp *src, int n, int inc, fp *dest) { +void copy_vector(fp* src, int n, int inc, fp* dest) { int abs_inc = std::abs(inc); for (int i = 0; i < n; i++) dest[i * abs_inc] = src[i * abs_inc]; } template -void copy_matrix(vec_src &src, oneapi::math::layout layout, oneapi::math::transpose trans, int m, - int n, int ld, vec_dest &dest) { +void copy_matrix(vec_src& src, oneapi::math::layout layout, oneapi::math::transpose trans, int m, + int n, int ld, vec_dest& dest) { using T_data = typename vec_dest::value_type; dest.resize(matrix_size(layout, trans, m, n, ld)); if (((trans == oneapi::math::transpose::nontrans) && @@ -250,8 +251,8 @@ void copy_matrix(vec_src &src, oneapi::math::layout layout, oneapi::math::transp } template -void copy_matrix(fp_src *src, oneapi::math::layout layout, oneapi::math::transpose trans, int m, - int n, int ld, fp_dst *dest) { +void copy_matrix(fp_src* src, oneapi::math::layout layout, oneapi::math::transpose trans, int m, + int n, int ld, fp_dst* dest) { if (((trans == oneapi::math::transpose::nontrans) && (layout == oneapi::math::layout::col_major)) || ((trans != oneapi::math::transpose::nontrans) && @@ -268,7 +269,7 @@ void copy_matrix(fp_src *src, oneapi::math::layout layout, oneapi::math::transpo } template -void rand_matrix(vec &M, oneapi::math::transpose trans, int m, int n, int ld) { +void rand_matrix(vec& M, oneapi::math::transpose trans, int m, int n, int ld) { using fp = typename vec::value_type; M.resize(matrix_size(trans, m, n, ld)); @@ -286,7 +287,7 @@ void rand_matrix(vec &M, oneapi::math::transpose trans, int m, int n, int ld) { } template -void rand_matrix(vec &M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, int n, +void rand_matrix(vec& M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, int n, int ld) { using fp = typename vec::value_type; @@ -308,7 +309,7 @@ void rand_matrix(vec &M, oneapi::math::layout layout, oneapi::math::transpose tr } template -void rand_matrix(fp *M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, int n, +void rand_matrix(fp* M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, int n, int ld) { if (((trans == oneapi::math::transpose::nontrans) && (layout == oneapi::math::layout::col_major)) || @@ -326,7 +327,7 @@ void rand_matrix(fp *M, oneapi::math::layout layout, oneapi::math::transpose tra } template -void rand_trsm_matrix(vec &M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, +void rand_trsm_matrix(vec& M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, int n, int ld) { using fp = typename vec::value_type; @@ -356,8 +357,8 @@ void rand_trsm_matrix(vec &M, oneapi::math::layout layout, oneapi::math::transpo } template -void rand_trsm_matrix(fp *M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, int n, - int ld) { +void rand_trsm_matrix(fp* M, oneapi::math::layout layout, oneapi::math::transpose trans, int m, + int n, int ld) { if (((trans == oneapi::math::transpose::nontrans) && (layout == oneapi::math::layout::col_major)) || ((trans != oneapi::math::transpose::nontrans) && @@ -382,7 +383,7 @@ void rand_trsm_matrix(fp *M, oneapi::math::layout layout, oneapi::math::transpos } template -void rand_tpsv_matrix(vec &M, oneapi::math::layout layout, oneapi::math::uplo upper_lower, +void rand_tpsv_matrix(vec& M, oneapi::math::layout layout, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, int m) { using fp = typename vec::value_type; std::vector tmp; @@ -408,7 +409,7 @@ void rand_tpsv_matrix(vec &M, oneapi::math::layout layout, oneapi::math::uplo up } template -void rand_tbsv_matrix(vec &M, oneapi::math::layout layout, oneapi::math::uplo upper_lower, +void rand_tbsv_matrix(vec& M, oneapi::math::layout layout, oneapi::math::uplo upper_lower, oneapi::math::transpose trans, int m, int k, int ld) { using fp = typename vec::value_type; std::vector tmp; @@ -461,7 +462,7 @@ typename std::enable_if::value, bool>::type check_equal(fp } template -bool check_equal_ptr(sycl::queue queue, fp *x, fp x_ref, int error_mag) { +bool check_equal_ptr(sycl::queue queue, fp* x, fp x_ref, int error_mag) { fp x_host; queue.memcpy(&x_host, x, sizeof(fp)).wait(); return check_equal(x_host, x_ref, error_mag); @@ -485,7 +486,7 @@ bool check_equal_trsm(fp x, fp x_ref, int error_mag) { } template -bool check_equal(fp x, fp x_ref, int error_mag, std::ostream &out) { +bool check_equal(fp x, fp x_ref, int error_mag, std::ostream& out) { bool good = check_equal(x, x_ref, error_mag); if (!good) { @@ -495,15 +496,15 @@ bool check_equal(fp x, fp x_ref, int error_mag, std::ostream &out) { } template -bool check_equal_ptr(sycl::queue queue, fp *x, fp x_ref, int error_mag, std::ostream &out) { +bool check_equal_ptr(sycl::queue queue, fp* x, fp x_ref, int error_mag, std::ostream& out) { fp x_host; queue.memcpy(&x_host, x, sizeof(fp)).wait(); return check_equal(x_host, x_ref, error_mag, out); } template -bool check_equal_vector(const fp *v, const fp *v_ref, int n, int inc, int error_mag, - std::ostream &out) { +bool check_equal_vector(const fp* v, const fp* v_ref, int n, int inc, int error_mag, + std::ostream& out) { int abs_inc = std::abs(inc), count = 0; bool good = true; @@ -523,7 +524,7 @@ bool check_equal_vector(const fp *v, const fp *v_ref, int n, int inc, int error_ } template -bool check_equal_vector(vec1 &v, vec2 &v_ref, int n, int inc, int error_mag, std::ostream &out) { +bool check_equal_vector(vec1& v, vec2& v_ref, int n, int inc, int error_mag, std::ostream& out) { int abs_inc = std::abs(inc), count = 0; bool good = true; @@ -543,8 +544,8 @@ bool check_equal_vector(vec1 &v, vec2 &v_ref, int n, int inc, int error_mag, std } template -bool check_equal_trsv_vector(vec1 &v, vec2 &v_ref, int n, int inc, int error_mag, - std::ostream &out) { +bool check_equal_trsv_vector(vec1& v, vec2& v_ref, int n, int inc, int error_mag, + std::ostream& out) { int abs_inc = std::abs(inc), count = 0; bool good = true; @@ -564,8 +565,8 @@ bool check_equal_trsv_vector(vec1 &v, vec2 &v_ref, int n, int inc, int error_mag } template -bool check_equal_matrix(acc1 &M, acc2 &M_ref, oneapi::math::layout layout, int m, int n, int ld, - int error_mag, std::ostream &out) { +bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::math::layout layout, int m, int n, int ld, + int error_mag, std::ostream& out) { bool good = true; int idx, count = 0; for (int j = 0; j < n; j++) { @@ -586,8 +587,8 @@ bool check_equal_matrix(acc1 &M, acc2 &M_ref, oneapi::math::layout layout, int m } template -bool check_equal_matrix(const fp *M, const fp *M_ref, oneapi::math::layout layout, int m, int n, - int ld, int error_mag, std::ostream &out) { +bool check_equal_matrix(const fp* M, const fp* M_ref, oneapi::math::layout layout, int m, int n, + int ld, int error_mag, std::ostream& out) { bool good = true; int idx, count = 0; for (int j = 0; j < n; j++) { @@ -608,9 +609,9 @@ bool check_equal_matrix(const fp *M, const fp *M_ref, oneapi::math::layout layou } template -bool check_equal_matrix(acc1 &M, acc2 &M_ref, oneapi::math::layout layout, +bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int m, int n, int ld, int error_mag, - std::ostream &out) { + std::ostream& out) { bool good = true; int idx, count = 0; for (int j = 0; j < n; j++) { @@ -634,8 +635,8 @@ bool check_equal_matrix(acc1 &M, acc2 &M_ref, oneapi::math::layout layout, } template -bool check_equal_trsm_matrix(acc1 &M, acc2 &M_ref, oneapi::math::layout layout, int m, int n, int ld, - int error_mag, std::ostream &out) { +bool check_equal_trsm_matrix(acc1& M, acc2& M_ref, oneapi::math::layout layout, int m, int n, + int ld, int error_mag, std::ostream& out) { bool good = true; int idx, count = 0; for (int j = 0; j < n; j++) { @@ -677,8 +678,8 @@ typename std::enable_if::value, bool>::type check_almost_eq } template -bool check_almost_equal_matrix_int(Ta &M, Tb &M_ref, oneapi::math::layout layout, int m, int n, - int ld, int error_mag, std::ostream &out) { +bool check_almost_equal_matrix_int(Ta& M, Tb& M_ref, oneapi::math::layout layout, int m, int n, + int ld, int error_mag, std::ostream& out) { static_assert(is_matrix_type_integral() && is_matrix_type_integral()); bool good = true; int idx, count = 0; @@ -700,8 +701,8 @@ bool check_almost_equal_matrix_int(Ta &M, Tb &M_ref, oneapi::math::layout layout } template -bool check_almost_equal_matrix(Ta &M, Tb &M_ref, oneapi::math::layout layout, int m, int n, int ld, - int error_mag, std::ostream &out) { +bool check_almost_equal_matrix(Ta& M, Tb& M_ref, oneapi::math::layout layout, int m, int n, int ld, + int error_mag, std::ostream& out) { // Only call if returned dtype is integral if constexpr (is_matrix_type_integral() && is_matrix_type_integral()) return check_almost_equal_matrix_int(M, M_ref, layout, m, n, ld, error_mag, out); diff --git a/tests/unit_tests/blas/level1/asum.cpp b/tests/unit_tests/blas/level1/asum.cpp index 60263989b..a2caef8d9 100644 --- a/tests/unit_tests/blas/level1/asum.cpp +++ b/tests/unit_tests/blas/level1/asum.cpp @@ -83,7 +83,8 @@ int test(device* dev, oneapi::math::layout layout, int64_t N, int64_t incx) { #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::asum(main_queue, N, x_buffer, incx, result_buffer); + oneapi::math::blas::column_major::asum(main_queue, N, x_buffer, incx, + result_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::asum(main_queue, N, x_buffer, incx, result_buffer); @@ -97,8 +98,8 @@ int test(device* dev, oneapi::math::layout layout, int64_t N, int64_t incx) { x_buffer, incx, result_buffer); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::asum, N, x_buffer, - incx, result_buffer); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::asum, N, + x_buffer, incx, result_buffer); break; default: break; } diff --git a/tests/unit_tests/blas/level1/asum_usm.cpp b/tests/unit_tests/blas/level1/asum_usm.cpp index 8d15c1407..64be04afc 100644 --- a/tests/unit_tests/blas/level1/asum_usm.cpp +++ b/tests/unit_tests/blas/level1/asum_usm.cpp @@ -97,11 +97,11 @@ int test(device* dev, oneapi::math::layout layout, int64_t N, int64_t incx) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::asum(main_queue, N, x.data(), incx, - result_p, dependencies); + result_p, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::asum(main_queue, N, x.data(), incx, result_p, - dependencies); + dependencies); break; default: break; } @@ -113,8 +113,8 @@ int test(device* dev, oneapi::math::layout layout, int64_t N, int64_t incx) { x.data(), incx, result_p, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::asum, N, x.data(), - incx, result_p, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::asum, N, + x.data(), incx, result_p, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level1/axpby.cpp b/tests/unit_tests/blas/level1/axpby.cpp index 6b417e913..3b063404b 100644 --- a/tests/unit_tests/blas/level1/axpby.cpp +++ b/tests/unit_tests/blas/level1/axpby.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha, fp beta) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha, fp beta) { // Prepare data. vector x, y, y_ref; @@ -58,18 +58,18 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::axpby(&N_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + ::axpby(&N_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ AXPBY. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPBY:\n" << e.what() << std::endl; print_error_code(e); @@ -87,11 +87,11 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::axpby(main_queue, N, alpha, x_buffer, incx, beta, - y_buffer, incy); + y_buffer, incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::axpby(main_queue, N, alpha, x_buffer, incx, beta, - y_buffer, incy); + y_buffer, incy); break; default: break; } @@ -109,16 +109,16 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPBY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPBY:\n" << error.what() << std::endl; } @@ -131,7 +131,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp } class AxpbyTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpbyTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level1/axpby_usm.cpp b/tests/unit_tests/blas/level1/axpby_usm.cpp index 152879329..3bd8b4a71 100644 --- a/tests/unit_tests/blas/level1/axpby_usm.cpp +++ b/tests/unit_tests/blas/level1/axpby_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha, fp beta) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha, fp beta) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPBY:\n" << e.what() << std::endl; print_error_code(e); @@ -79,8 +79,8 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::axpby(&N_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + ::axpby(&N_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ AXPBY. @@ -89,11 +89,11 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::axpby(main_queue, N, alpha, x.data(), incx, - beta, y.data(), incy, dependencies); + beta, y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::axpby(main_queue, N, alpha, x.data(), incx, - beta, y.data(), incy, dependencies); + beta, y.data(), incy, dependencies); break; default: break; } @@ -113,16 +113,16 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPBY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPBY:\n" << error.what() << std::endl; } @@ -134,7 +134,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp } class AxpbyUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpbyUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level1/axpy.cpp b/tests/unit_tests/blas/level1/axpy.cpp index dc38cb718..5c6557de6 100644 --- a/tests/unit_tests/blas/level1/axpy.cpp +++ b/tests/unit_tests/blas/level1/axpy.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha) { // Prepare data. vector x, y, y_ref; @@ -58,18 +58,17 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::axpy(&N_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y_ref.data(), - &incy_ref); + ::axpy(&N_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ AXPY. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY:\n" << e.what() << std::endl; print_error_code(e); @@ -87,19 +86,19 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::axpy(main_queue, N, alpha, x_buffer, incx, - y_buffer, incy); + y_buffer, incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::axpy(main_queue, N, alpha, x_buffer, incx, y_buffer, - incy); + incy); break; default: break; } #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy, N, alpha, - x_buffer, incx, y_buffer, incy); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy, N, + alpha, x_buffer, incx, y_buffer, incy); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpy, N, alpha, @@ -109,16 +108,16 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY:\n" << error.what() << std::endl; } @@ -130,7 +129,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp return (int)good; } -class AxpyTests : public ::testing::TestWithParam> { +class AxpyTests : public ::testing::TestWithParam> { }; TEST_P(AxpyTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/axpy_usm.cpp b/tests/unit_tests/blas/level1/axpy_usm.cpp index d24c500a6..737c44de3 100644 --- a/tests/unit_tests/blas/level1/axpy_usm.cpp +++ b/tests/unit_tests/blas/level1/axpy_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp alpha) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY:\n" << e.what() << std::endl; print_error_code(e); @@ -79,8 +79,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::axpy(&N_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y_ref.data(), - &incy_ref); + ::axpy(&N_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ AXPY. @@ -89,11 +88,11 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::axpy(main_queue, N, alpha, x.data(), incx, - y.data(), incy, dependencies); + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::axpy(main_queue, N, alpha, x.data(), incx, - y.data(), incy, dependencies); + y.data(), incy, dependencies); break; default: break; } @@ -101,8 +100,8 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy, N, alpha, - x.data(), incx, y.data(), incy, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::axpy, N, + alpha, x.data(), incx, y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::axpy, N, alpha, @@ -113,16 +112,16 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY:\n" << error.what() << std::endl; } @@ -134,7 +133,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp } class AxpyUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpyUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level1/copy.cpp b/tests/unit_tests/blas/level1/copy.cpp index 9c2a2d54e..c4ed07a59 100644 --- a/tests/unit_tests/blas/level1/copy.cpp +++ b/tests/unit_tests/blas/level1/copy.cpp @@ -86,7 +86,7 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::copy(main_queue, N, x_buffer, incx, y_buffer, - incy); + incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::copy(main_queue, N, x_buffer, incx, y_buffer, incy); @@ -100,8 +100,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { x_buffer, incx, y_buffer, incy); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy, N, x_buffer, - incx, y_buffer, incy); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy, N, + x_buffer, incx, y_buffer, incy); break; default: break; } diff --git a/tests/unit_tests/blas/level1/copy_usm.cpp b/tests/unit_tests/blas/level1/copy_usm.cpp index 1243b493f..aa2d59c2a 100644 --- a/tests/unit_tests/blas/level1/copy_usm.cpp +++ b/tests/unit_tests/blas/level1/copy_usm.cpp @@ -88,11 +88,11 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::copy(main_queue, N, x.data(), incx, - y.data(), incy, dependencies); + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::copy(main_queue, N, x.data(), incx, y.data(), - incy, dependencies); + incy, dependencies); break; default: break; } @@ -104,8 +104,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { x.data(), incx, y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy, N, x.data(), - incx, y.data(), incy, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::copy, N, + x.data(), incx, y.data(), incy, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level1/dot.cpp b/tests/unit_tests/blas/level1/dot.cpp index b00e83a69..055a2b472 100644 --- a/tests/unit_tests/blas/level1/dot.cpp +++ b/tests/unit_tests/blas/level1/dot.cpp @@ -86,11 +86,11 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::dot(main_queue, N, x_buffer, incx, y_buffer, incy, - result_buffer); + result_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::dot(main_queue, N, x_buffer, incx, y_buffer, incy, - result_buffer); + result_buffer); break; default: break; } @@ -129,7 +129,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { return (int)good; } -class DotTests : public ::testing::TestWithParam> {}; +class DotTests : public ::testing::TestWithParam> { +}; TEST_P(DotTests, RealSinglePrecision) { EXPECT_TRUEORSKIP( diff --git a/tests/unit_tests/blas/level1/dot_usm.cpp b/tests/unit_tests/blas/level1/dot_usm.cpp index 8d17272c1..de775c4bf 100644 --- a/tests/unit_tests/blas/level1/dot_usm.cpp +++ b/tests/unit_tests/blas/level1/dot_usm.cpp @@ -96,12 +96,12 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - done = oneapi::math::blas::column_major::dot(main_queue, N, x.data(), incx, y.data(), - incy, result_p, dependencies); + done = oneapi::math::blas::column_major::dot( + main_queue, N, x.data(), incx, y.data(), incy, result_p, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::dot(main_queue, N, x.data(), incx, y.data(), - incy, result_p, dependencies); + incy, result_p, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level1/dotc.cpp b/tests/unit_tests/blas/level1/dotc.cpp index 96ae89f73..d6f6cafc2 100644 --- a/tests/unit_tests/blas/level1/dotc.cpp +++ b/tests/unit_tests/blas/level1/dotc.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { // Prepare data. vector x, y; fp result = 0.0, result_reference = 0.0; @@ -58,18 +58,18 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::dotc((fp_ref *)&result_reference, &N_ref, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), + ::dotc((fp_ref*)&result_reference, &N_ref, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref); // Call DPC++ DOTC. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DOTC:\n" << e.what() << std::endl; print_error_code(e); @@ -87,12 +87,12 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::dotc(main_queue, N, x_buffer, incx, y_buffer, incy, - result_buffer); + oneapi::math::blas::column_major::dotc(main_queue, N, x_buffer, incx, y_buffer, + incy, result_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::dotc(main_queue, N, x_buffer, incx, y_buffer, incy, - result_buffer); + result_buffer); break; default: break; } @@ -103,23 +103,23 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { x_buffer, incx, y_buffer, incy, result_buffer); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotc, N, x_buffer, - incx, y_buffer, incy, result_buffer); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotc, N, + x_buffer, incx, y_buffer, incy, result_buffer); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DOTC:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DOTC:\n" << error.what() << std::endl; } @@ -131,7 +131,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { return (int)good; } -class DotcTests : public ::testing::TestWithParam> { +class DotcTests : public ::testing::TestWithParam> { }; TEST_P(DotcTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/dotc_usm.cpp b/tests/unit_tests/blas/level1/dotc_usm.cpp index 3148bd3f2..14d65968f 100644 --- a/tests/unit_tests/blas/level1/dotc_usm.cpp +++ b/tests/unit_tests/blas/level1/dotc_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DOTC:\n" << e.what() << std::endl; print_error_code(e); @@ -78,12 +78,12 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::dotc((fp_ref *)&result_reference, &N_ref, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), + ::dotc((fp_ref*)&result_reference, &N_ref, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref); // Call DPC++ DOTC. - auto result_p = (fp *)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); + auto result_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); try { #ifdef CALL_RT_API @@ -94,7 +94,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::dotc(main_queue, N, x.data(), incx, y.data(), - incy, result_p, dependencies); + incy, result_p, dependencies); break; default: break; } @@ -106,24 +106,24 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { x.data(), incx, y.data(), incy, result_p, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotc, N, x.data(), - incx, y.data(), incy, result_p, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotc, N, + x.data(), incx, y.data(), incy, result_p, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DOTC:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DOTC:\n" << error.what() << std::endl; } @@ -137,7 +137,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { } class DotcUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DotcUsmTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP( diff --git a/tests/unit_tests/blas/level1/dotu.cpp b/tests/unit_tests/blas/level1/dotu.cpp index ded168de0..fd70e4fa1 100644 --- a/tests/unit_tests/blas/level1/dotu.cpp +++ b/tests/unit_tests/blas/level1/dotu.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { // Prepare data. vector x, y; fp result = 0.0, result_reference = 0.0; @@ -58,18 +58,18 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::dotu((fp_ref *)&result_reference, &N_ref, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), + ::dotu((fp_ref*)&result_reference, &N_ref, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref); // Call DPC++ DOTU. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DOTU:\n" << e.what() << std::endl; print_error_code(e); @@ -87,12 +87,12 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::dotu(main_queue, N, x_buffer, incx, y_buffer, incy, - result_buffer); + oneapi::math::blas::column_major::dotu(main_queue, N, x_buffer, incx, y_buffer, + incy, result_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::dotu(main_queue, N, x_buffer, incx, y_buffer, incy, - result_buffer); + result_buffer); break; default: break; } @@ -103,23 +103,23 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { x_buffer, incx, y_buffer, incy, result_buffer); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotu, N, x_buffer, - incx, y_buffer, incy, result_buffer); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotu, N, + x_buffer, incx, y_buffer, incy, result_buffer); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DOTU:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DOTU:\n" << error.what() << std::endl; } @@ -131,7 +131,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { return (int)good; } -class DotuTests : public ::testing::TestWithParam> { +class DotuTests : public ::testing::TestWithParam> { }; TEST_P(DotuTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/dotu_usm.cpp b/tests/unit_tests/blas/level1/dotu_usm.cpp index 889e01705..e1a34c5d2 100644 --- a/tests/unit_tests/blas/level1/dotu_usm.cpp +++ b/tests/unit_tests/blas/level1/dotu_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DOTU:\n" << e.what() << std::endl; print_error_code(e); @@ -78,12 +78,12 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::dotu((fp_ref *)&result_reference, &N_ref, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), + ::dotu((fp_ref*)&result_reference, &N_ref, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref); // Call DPC++ DOTU. - auto result_p = (fp *)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); + auto result_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); try { #ifdef CALL_RT_API @@ -94,7 +94,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::dotu(main_queue, N, x.data(), incx, y.data(), - incy, result_p, dependencies); + incy, result_p, dependencies); break; default: break; } @@ -106,24 +106,24 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { x.data(), incx, y.data(), incy, result_p, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotu, N, x.data(), - incx, y.data(), incy, result_p, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::dotu, N, + x.data(), incx, y.data(), incy, result_p, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DOTU:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DOTU:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy) { } class DotuUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DotuUsmTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP( diff --git a/tests/unit_tests/blas/level1/iamax.cpp b/tests/unit_tests/blas/level1/iamax.cpp index 19672e341..e56f58c42 100644 --- a/tests/unit_tests/blas/level1/iamax.cpp +++ b/tests/unit_tests/blas/level1/iamax.cpp @@ -84,7 +84,7 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::iamax(main_queue, N, x_buffer, incx, - result_buffer); + result_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::iamax(main_queue, N, x_buffer, incx, result_buffer); @@ -126,8 +126,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { return (int)good; } -class IamaxTests : public ::testing::TestWithParam> { -}; +class IamaxTests + : public ::testing::TestWithParam> {}; TEST_P(IamaxTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 1357, 2)); diff --git a/tests/unit_tests/blas/level1/iamax_usm.cpp b/tests/unit_tests/blas/level1/iamax_usm.cpp index 88029f248..a55127fa2 100644 --- a/tests/unit_tests/blas/level1/iamax_usm.cpp +++ b/tests/unit_tests/blas/level1/iamax_usm.cpp @@ -96,11 +96,11 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::iamax(main_queue, N, x.data(), incx, - result_p, dependencies); + result_p, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::iamax(main_queue, N, x.data(), incx, result_p, - dependencies); + dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level1/iamin.cpp b/tests/unit_tests/blas/level1/iamin.cpp index 197d7f3d1..d0673a2cd 100644 --- a/tests/unit_tests/blas/level1/iamin.cpp +++ b/tests/unit_tests/blas/level1/iamin.cpp @@ -84,7 +84,7 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::iamin(main_queue, N, x_buffer, incx, - result_buffer); + result_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::iamin(main_queue, N, x_buffer, incx, result_buffer); @@ -126,8 +126,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { return (int)good; } -class IaminTests : public ::testing::TestWithParam> { -}; +class IaminTests + : public ::testing::TestWithParam> {}; TEST_P(IaminTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 1357, 2)); diff --git a/tests/unit_tests/blas/level1/iamin_usm.cpp b/tests/unit_tests/blas/level1/iamin_usm.cpp index 9baea05ef..beb535595 100644 --- a/tests/unit_tests/blas/level1/iamin_usm.cpp +++ b/tests/unit_tests/blas/level1/iamin_usm.cpp @@ -96,11 +96,11 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::iamin(main_queue, N, x.data(), incx, - result_p, dependencies); + result_p, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::iamin(main_queue, N, x.data(), incx, result_p, - dependencies); + dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level1/nrm2.cpp b/tests/unit_tests/blas/level1/nrm2.cpp index edc0edd93..30ddd63fd 100644 --- a/tests/unit_tests/blas/level1/nrm2.cpp +++ b/tests/unit_tests/blas/level1/nrm2.cpp @@ -84,7 +84,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::nrm2(main_queue, N, x_buffer, incx, result_buffer); + oneapi::math::blas::column_major::nrm2(main_queue, N, x_buffer, incx, + result_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::nrm2(main_queue, N, x_buffer, incx, result_buffer); @@ -98,8 +99,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { x_buffer, incx, result_buffer); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::nrm2, N, x_buffer, - incx, result_buffer); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::nrm2, N, + x_buffer, incx, result_buffer); break; default: break; } diff --git a/tests/unit_tests/blas/level1/nrm2_usm.cpp b/tests/unit_tests/blas/level1/nrm2_usm.cpp index b22701401..87470eb92 100644 --- a/tests/unit_tests/blas/level1/nrm2_usm.cpp +++ b/tests/unit_tests/blas/level1/nrm2_usm.cpp @@ -97,11 +97,11 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::nrm2(main_queue, N, x.data(), incx, - result_p, dependencies); + result_p, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::nrm2(main_queue, N, x.data(), incx, result_p, - dependencies); + dependencies); break; default: break; } @@ -113,8 +113,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx) { x.data(), incx, result_p, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::nrm2, N, x.data(), - incx, result_p, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::nrm2, N, + x.data(), incx, result_p, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level1/rot.cpp b/tests/unit_tests/blas/level1/rot.cpp index 30506b4bf..6508283b3 100644 --- a/tests/unit_tests/blas/level1/rot.cpp +++ b/tests/unit_tests/blas/level1/rot.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp_scalar c, +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp_scalar c, fp_scalar s) { // Prepare data. vector x, x_ref, y, y_ref; @@ -59,18 +59,18 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::rot(&N_ref, (fp_ref *)x_ref.data(), &incx_ref, (fp_ref *)y_ref.data(), &incy_ref, - (fp_scalar *)&c, (fp_scalar *)&s); + ::rot(&N_ref, (fp_ref*)x_ref.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref, + (fp_scalar*)&c, (fp_scalar*)&s); // Call DPC++ ROT. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROT:\n" << e.what() << std::endl; print_error_code(e); @@ -88,11 +88,11 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::rot(main_queue, N, x_buffer, incx, y_buffer, incy, - c, s); + c, s); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::rot(main_queue, N, x_buffer, incx, y_buffer, incy, c, - s); + s); break; default: break; } @@ -110,16 +110,16 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROT:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROT:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp return (int)good; } -class RotTests : public ::testing::TestWithParam> { +class RotTests : public ::testing::TestWithParam> { }; TEST_P(RotTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/rot_usm.cpp b/tests/unit_tests/blas/level1/rot_usm.cpp index 880228cc7..52bc8c2f5 100644 --- a/tests/unit_tests/blas/level1/rot_usm.cpp +++ b/tests/unit_tests/blas/level1/rot_usm.cpp @@ -41,20 +41,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp_scalar c, +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp_scalar c, fp_scalar s) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROT:\n" << e.what() << std::endl; print_error_code(e); @@ -80,8 +80,8 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::rot(&N_ref, (fp_ref *)x_ref.data(), &incx_ref, (fp_ref *)y_ref.data(), &incy_ref, - (fp_scalar *)&c, (fp_scalar *)&s); + ::rot(&N_ref, (fp_ref*)x_ref.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref, + (fp_scalar*)&c, (fp_scalar*)&s); // Call DPC++ ROT. @@ -89,12 +89,12 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - done = oneapi::math::blas::column_major::rot(main_queue, N, x.data(), incx, y.data(), - incy, c, s, dependencies); + done = oneapi::math::blas::column_major::rot(main_queue, N, x.data(), incx, + y.data(), incy, c, s, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::rot(main_queue, N, x.data(), incx, y.data(), - incy, c, s, dependencies); + incy, c, s, dependencies); break; default: break; } @@ -114,16 +114,16 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROT:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROT:\n" << error.what() << std::endl; } @@ -137,7 +137,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp } class RotUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(RotUsmTests, RealSinglePrecision) { float c(2.0); diff --git a/tests/unit_tests/blas/level1/rotg.cpp b/tests/unit_tests/blas/level1/rotg.cpp index 23b2b9969..4ccb8eb30 100644 --- a/tests/unit_tests/blas/level1/rotg.cpp +++ b/tests/unit_tests/blas/level1/rotg.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Prepare data. fp a, b, s, a_ref, b_ref, s_ref; fp_scalar c, c_ref; @@ -64,17 +64,17 @@ int test(device *dev, oneapi::math::layout layout) { // Call Reference ROTG. using fp_ref = typename ref_type_info::type; - ::rotg((fp_ref *)&a_ref, (fp_ref *)&b_ref, (fp_scalar *)&c_ref, (fp_ref *)&s_ref); + ::rotg((fp_ref*)&a_ref, (fp_ref*)&b_ref, (fp_scalar*)&c_ref, (fp_ref*)&s_ref); // Call DPC++ ROTG. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTG:\n" << e.what() << std::endl; print_error_code(e); @@ -94,19 +94,19 @@ int test(device *dev, oneapi::math::layout layout) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::rotg(main_queue, a_buffer, b_buffer, c_buffer, - s_buffer); + s_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::rotg(main_queue, a_buffer, b_buffer, c_buffer, - s_buffer); + s_buffer); break; default: break; } #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotg, a_buffer, - b_buffer, c_buffer, s_buffer); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotg, + a_buffer, b_buffer, c_buffer, s_buffer); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotg, a_buffer, @@ -116,16 +116,16 @@ int test(device *dev, oneapi::math::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTG:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTG:\n" << error.what() << std::endl; } @@ -144,7 +144,7 @@ int test(device *dev, oneapi::math::layout layout) { return (int)good; } -class RotgTests : public ::testing::TestWithParam> { +class RotgTests : public ::testing::TestWithParam> { }; TEST_P(RotgTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/rotg_usm.cpp b/tests/unit_tests/blas/level1/rotg_usm.cpp index a90023a8b..f198bb393 100644 --- a/tests/unit_tests/blas/level1/rotg_usm.cpp +++ b/tests/unit_tests/blas/level1/rotg_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTG:\n" << e.what() << std::endl; print_error_code(e); @@ -83,22 +83,22 @@ int test(device *dev, oneapi::math::layout layout) { // Call Reference ROTG. using fp_ref = typename ref_type_info::type; - ::rotg((fp_ref *)&a_ref, (fp_ref *)&b_ref, (fp_scalar *)&c_ref, (fp_ref *)&s_ref); + ::rotg((fp_ref*)&a_ref, (fp_ref*)&b_ref, (fp_scalar*)&c_ref, (fp_ref*)&s_ref); // Call DPC++ ROTG. fp *a_p, *b_p, *s_p; - fp_scalar *c_p; + fp_scalar* c_p; if constexpr (alloc_type == usm::alloc::shared) { - a_p = (fp *)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); - b_p = (fp *)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); - s_p = (fp *)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); - c_p = (fp_scalar *)oneapi::math::malloc_shared(64, sizeof(fp_scalar), *dev, cxt); + a_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); + b_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); + s_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); + c_p = (fp_scalar*)oneapi::math::malloc_shared(64, sizeof(fp_scalar), *dev, cxt); } else if constexpr (alloc_type == usm::alloc::device) { - a_p = (fp *)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); - b_p = (fp *)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); - s_p = (fp *)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); - c_p = (fp_scalar *)oneapi::math::malloc_device(64, sizeof(fp_scalar), *dev, cxt); + a_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); + b_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); + s_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); + c_p = (fp_scalar*)oneapi::math::malloc_device(64, sizeof(fp_scalar), *dev, cxt); } else { throw std::runtime_error("Bad alloc_type"); @@ -115,11 +115,11 @@ int test(device *dev, oneapi::math::layout layout) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::rotg(main_queue, a_p, b_p, c_p, s_p, - dependencies); + dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::rotg(main_queue, a_p, b_p, c_p, s_p, - dependencies); + dependencies); break; default: break; } @@ -127,8 +127,8 @@ int test(device *dev, oneapi::math::layout layout) { #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotg, a_p, b_p, - c_p, s_p, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::rotg, a_p, + b_p, c_p, s_p, dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotg, a_p, b_p, @@ -139,16 +139,16 @@ int test(device *dev, oneapi::math::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTG:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTG:\n" << error.what() << std::endl; } @@ -170,7 +170,7 @@ int test(device *dev, oneapi::math::layout layout) { } class RotgUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(RotgUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP((test(std::get<0>(GetParam()), std::get<1>(GetParam())))); diff --git a/tests/unit_tests/blas/level1/rotm.cpp b/tests/unit_tests/blas/level1/rotm.cpp index 81f9f137e..c9a38ff40 100644 --- a/tests/unit_tests/blas/level1/rotm.cpp +++ b/tests/unit_tests/blas/level1/rotm.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp flag) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp flag) { // Prepare data. vector x, x_ref, y, y_ref; vector param; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::rotm(&N_ref, (fp_ref *)x_ref.data(), &incx_ref, (fp_ref *)y_ref.data(), &incy_ref, - (fp_ref *)param.data()); + ::rotm(&N_ref, (fp_ref*)x_ref.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref, + (fp_ref*)param.data()); // Call DPC++ ROTM. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTM:\n" << e.what() << std::endl; print_error_code(e); @@ -90,12 +90,12 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::rotm(main_queue, N, x_buffer, incx, y_buffer, incy, - param_buffer); + oneapi::math::blas::column_major::rotm(main_queue, N, x_buffer, incx, y_buffer, + incy, param_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::rotm(main_queue, N, x_buffer, incx, y_buffer, incy, - param_buffer); + param_buffer); break; default: break; } @@ -106,23 +106,23 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp x_buffer, incx, y_buffer, incy, param_buffer); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotm, N, x_buffer, - incx, y_buffer, incy, param_buffer); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotm, N, + x_buffer, incx, y_buffer, incy, param_buffer); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTM:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTM:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp return (int)good; } -class RotmTests : public ::testing::TestWithParam> { +class RotmTests : public ::testing::TestWithParam> { }; TEST_P(RotmTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/rotm_usm.cpp b/tests/unit_tests/blas/level1/rotm_usm.cpp index 1625215a9..0c33bb467 100644 --- a/tests/unit_tests/blas/level1/rotm_usm.cpp +++ b/tests/unit_tests/blas/level1/rotm_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp flag) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, fp flag) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTM:\n" << e.what() << std::endl; print_error_code(e); @@ -81,8 +81,8 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::rotm(&N_ref, (fp_ref *)x_ref.data(), &incx_ref, (fp_ref *)y_ref.data(), &incy_ref, - (fp_ref *)param.data()); + ::rotm(&N_ref, (fp_ref*)x_ref.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref, + (fp_ref*)param.data()); // Call DPC++ ROTM. @@ -95,7 +95,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::rotm(main_queue, N, x.data(), incx, y.data(), - incy, param.data(), dependencies); + incy, param.data(), dependencies); break; default: break; } @@ -107,24 +107,24 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp x.data(), incx, y.data(), incy, param.data(), dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotm, N, x.data(), - incx, y.data(), incy, param.data(), dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotm, N, + x.data(), incx, y.data(), incy, param.data(), dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTM:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTM:\n" << error.what() << std::endl; } @@ -138,7 +138,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fp } class RotmUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(RotmUsmTests, RealSinglePrecision) { float flag(-1.0); diff --git a/tests/unit_tests/blas/level1/rotmg.cpp b/tests/unit_tests/blas/level1/rotmg.cpp index efc75a4b5..2d18311b7 100644 --- a/tests/unit_tests/blas/level1/rotmg.cpp +++ b/tests/unit_tests/blas/level1/rotmg.cpp @@ -91,11 +91,11 @@ int test(device* dev, oneapi::math::layout layout) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::rotmg(main_queue, d1_buffer, d2_buffer, x1_buffer, - y1, param_buffer); + y1, param_buffer); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::rotmg(main_queue, d1_buffer, d2_buffer, x1_buffer, y1, - param_buffer); + oneapi::math::blas::row_major::rotmg(main_queue, d1_buffer, d2_buffer, x1_buffer, + y1, param_buffer); break; default: break; } @@ -187,8 +187,8 @@ int test(device* dev, oneapi::math::layout layout) { return (int)good; } -class RotmgTests : public ::testing::TestWithParam> { -}; +class RotmgTests + : public ::testing::TestWithParam> {}; TEST_P(RotmgTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/level1/rotmg_usm.cpp b/tests/unit_tests/blas/level1/rotmg_usm.cpp index e992ad560..dcb79b2dd 100644 --- a/tests/unit_tests/blas/level1/rotmg_usm.cpp +++ b/tests/unit_tests/blas/level1/rotmg_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout) { +int test(device* dev, oneapi::math::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTMG:\n" << e.what() << std::endl; print_error_code(e); @@ -82,14 +82,14 @@ int test(device *dev, oneapi::math::layout layout) { fp *d1_p, *d2_p, *x1_p; if constexpr (alloc_type == usm::alloc::device) { - d1_p = (fp *)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); - d2_p = (fp *)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); - x1_p = (fp *)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); + d1_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); + d2_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); + x1_p = (fp*)oneapi::math::malloc_device(64, sizeof(fp), *dev, cxt); } else if constexpr (alloc_type == usm::alloc::shared) { - d1_p = (fp *)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); - d2_p = (fp *)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); - x1_p = (fp *)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); + d1_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); + d2_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); + x1_p = (fp*)oneapi::math::malloc_shared(64, sizeof(fp), *dev, cxt); } else { throw std::runtime_error("Bad alloc_type"); @@ -101,7 +101,7 @@ int test(device *dev, oneapi::math::layout layout) { // Call Reference ROTMG. - ::rotmg(&d1_ref, &d2_ref, &x1_ref, &y1, (fp *)param_ref.data()); + ::rotmg(&d1_ref, &d2_ref, &x1_ref, &y1, (fp*)param_ref.data()); // Call DPC++ ROTMG. @@ -110,11 +110,11 @@ int test(device *dev, oneapi::math::layout layout) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::rotmg(main_queue, d1_p, d2_p, x1_p, y1, - param.data(), dependencies); + param.data(), dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::rotmg(main_queue, d1_p, d2_p, x1_p, y1, - param.data(), dependencies); + param.data(), dependencies); break; default: break; } @@ -126,24 +126,24 @@ int test(device *dev, oneapi::math::layout layout) { d2_p, x1_p, y1, param.data(), dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotmg, d1_p, d2_p, - x1_p, y1, param.data(), dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::rotmg, d1_p, + d2_p, x1_p, y1, param.data(), dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTMG:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTMG:\n" << error.what() << std::endl; } @@ -213,7 +213,7 @@ int test(device *dev, oneapi::math::layout layout) { } class RotmgUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(RotmgUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/level1/scal.cpp b/tests/unit_tests/blas/level1/scal.cpp index c345a6f55..27b10d2a4 100644 --- a/tests/unit_tests/blas/level1/scal.cpp +++ b/tests/unit_tests/blas/level1/scal.cpp @@ -95,8 +95,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, fp_scalar al #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::scal, N, alpha, - x_buffer, incx); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::scal, N, + alpha, x_buffer, incx); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::scal, N, alpha, diff --git a/tests/unit_tests/blas/level1/scal_usm.cpp b/tests/unit_tests/blas/level1/scal_usm.cpp index 6341533be..0da8f8ac4 100644 --- a/tests/unit_tests/blas/level1/scal_usm.cpp +++ b/tests/unit_tests/blas/level1/scal_usm.cpp @@ -89,11 +89,11 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, fp_scalar al switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::scal(main_queue, N, alpha, x.data(), incx, - dependencies); + dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::scal(main_queue, N, alpha, x.data(), incx, - dependencies); + dependencies); break; default: break; } @@ -101,8 +101,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, fp_scalar al #else switch (layout) { case oneapi::math::layout::col_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::scal, N, alpha, - x.data(), incx, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::column_major::scal, N, + alpha, x.data(), incx, dependencies); break; case oneapi::math::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::scal, N, alpha, diff --git a/tests/unit_tests/blas/level1/sdsdot.cpp b/tests/unit_tests/blas/level1/sdsdot.cpp index 67505b5f3..d8ae45449 100644 --- a/tests/unit_tests/blas/level1/sdsdot.cpp +++ b/tests/unit_tests/blas/level1/sdsdot.cpp @@ -41,11 +41,11 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, float alpha) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, float alpha) { // Prepare data. vector x, y; float result = float(-1), result_ref = float(-1); @@ -56,18 +56,18 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fl // Call Reference SDSDOT. const int N_ref = N, incx_ref = incx, incy_ref = incy; - result_ref = ::sdsdot(&N_ref, (float *)&alpha, (float *)x.data(), &incx_ref, (float *)y.data(), - &incy_ref); + result_ref = + ::sdsdot(&N_ref, (float*)&alpha, (float*)x.data(), &incx_ref, (float*)y.data(), &incy_ref); // Call DPC++ SDSDOT. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SDSDOT:\n" << e.what() << std::endl; print_error_code(e); @@ -86,11 +86,11 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fl switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::sdsdot(main_queue, N, alpha, x_buffer, incx, - y_buffer, incy, result_buffer); + y_buffer, incy, result_buffer); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::sdsdot(main_queue, N, alpha, x_buffer, incx, y_buffer, - incy, result_buffer); + oneapi::math::blas::row_major::sdsdot(main_queue, N, alpha, x_buffer, incx, + y_buffer, incy, result_buffer); break; default: break; } @@ -108,16 +108,16 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fl } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SDSDOT:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SDSDOT:\n" << error.what() << std::endl; } @@ -130,7 +130,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fl } class SdsdotTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SdsdotTests, RealSinglePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); diff --git a/tests/unit_tests/blas/level1/sdsdot_usm.cpp b/tests/unit_tests/blas/level1/sdsdot_usm.cpp index 09d9e08a3..f7153d38c 100644 --- a/tests/unit_tests/blas/level1/sdsdot_usm.cpp +++ b/tests/unit_tests/blas/level1/sdsdot_usm.cpp @@ -41,18 +41,18 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { -int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, float alpha) { +int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy, float alpha) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SDSDOT:\n" << e.what() << std::endl; print_error_code(e); @@ -76,12 +76,12 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fl // Call Reference SDSDOT. const int N_ref = N, incx_ref = incx, incy_ref = incy; - result_ref = ::sdsdot(&N_ref, (float *)&alpha, (float *)x.data(), &incx_ref, (float *)y.data(), - &incy_ref); + result_ref = + ::sdsdot(&N_ref, (float*)&alpha, (float*)x.data(), &incx_ref, (float*)y.data(), &incy_ref); // Call DPC++ SDSDOT. - auto result_p = (float *)oneapi::math::malloc_shared(64, sizeof(float), *dev, cxt); + auto result_p = (float*)oneapi::math::malloc_shared(64, sizeof(float), *dev, cxt); try { #ifdef CALL_RT_API @@ -91,8 +91,8 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fl main_queue, N, alpha, x.data(), incx, y.data(), incy, result_p, dependencies); break; case oneapi::math::layout::row_major: - done = oneapi::math::blas::row_major::sdsdot(main_queue, N, alpha, x.data(), incx, - y.data(), incy, result_p, dependencies); + done = oneapi::math::blas::row_major::sdsdot( + main_queue, N, alpha, x.data(), incx, y.data(), incy, result_p, dependencies); break; default: break; } @@ -113,16 +113,16 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fl main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SDSDOT:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SDSDOT:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, int N, int incx, int incy, fl } class SdsdotUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SdsdotUsmTests, RealSinglePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); diff --git a/tests/unit_tests/blas/level1/swap.cpp b/tests/unit_tests/blas/level1/swap.cpp index 696943c35..2fa39ff38 100644 --- a/tests/unit_tests/blas/level1/swap.cpp +++ b/tests/unit_tests/blas/level1/swap.cpp @@ -86,7 +86,7 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::swap(main_queue, N, x_buffer, incx, y_buffer, - incy); + incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::swap(main_queue, N, x_buffer, incx, y_buffer, incy); @@ -100,8 +100,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { x_buffer, incx, y_buffer, incy); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::swap, N, x_buffer, - incx, y_buffer, incy); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::swap, N, + x_buffer, incx, y_buffer, incy); break; default: break; } diff --git a/tests/unit_tests/blas/level1/swap_usm.cpp b/tests/unit_tests/blas/level1/swap_usm.cpp index 3fd742c36..5d8b1e059 100644 --- a/tests/unit_tests/blas/level1/swap_usm.cpp +++ b/tests/unit_tests/blas/level1/swap_usm.cpp @@ -88,11 +88,11 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::swap(main_queue, N, x.data(), incx, - y.data(), incy, dependencies); + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::swap(main_queue, N, x.data(), incx, y.data(), - incy, dependencies); + incy, dependencies); break; default: break; } @@ -104,8 +104,8 @@ int test(device* dev, oneapi::math::layout layout, int N, int incx, int incy) { x.data(), incx, y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::swap, N, x.data(), - incx, y.data(), incy, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::swap, N, + x.data(), incx, y.data(), incy, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level2/gbmv.cpp b/tests/unit_tests/blas/level2/gbmv.cpp index f1e8fc3ec..096eaca44 100644 --- a/tests/unit_tests/blas/level2/gbmv.cpp +++ b/tests/unit_tests/blas/level2/gbmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n, +int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n, int kl, int ku, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. int x_len = outer_dimension(transa, m, n); @@ -66,18 +66,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans using fp_ref = typename ref_type_info::type; ::gbmv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), &m_ref, &n_ref, &kl_ref, - &ku_ref, (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + &ku_ref, (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ GBMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -96,13 +96,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::gbmv(main_queue, transa, m, n, kl, ku, alpha, - A_buffer, lda, x_buffer, incx, beta, y_buffer, - incy); + A_buffer, lda, x_buffer, incx, beta, + y_buffer, incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::gbmv(main_queue, transa, m, n, kl, ku, alpha, - A_buffer, lda, x_buffer, incx, beta, y_buffer, - incy); + A_buffer, lda, x_buffer, incx, beta, y_buffer, + incy); break; default: break; } @@ -122,16 +122,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GBMV:\n" << error.what() << std::endl; } @@ -142,21 +142,21 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans return (int)good; } -class GbmvTests : public ::testing::TestWithParam> { +class GbmvTests : public ::testing::TestWithParam> { }; TEST_P(GbmvTests, RealSinglePrecision) { float alpha(2.0); float beta(3.0); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2, 3, - 42)); + oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2, + 3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1, 1, - 42)); + oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1, + 1, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3, 42)); @@ -185,8 +185,8 @@ TEST_P(GbmvTests, RealDoublePrecision) { oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, -3, - 42)); + oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, + -3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1, 42)); @@ -237,14 +237,14 @@ TEST_P(GbmvTests, ComplexDoublePrecision) { oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, - beta, 2, 3, 42)); + oneapi::math::transpose::trans, 25, 30, 5, 7, + alpha, beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, - beta, -2, -3, 42)); + oneapi::math::transpose::trans, 25, 30, 5, 7, + alpha, beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, - beta, 1, 1, 42)); + oneapi::math::transpose::trans, 25, 30, 5, 7, + alpha, beta, 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans, 25, 30, 5, 7, alpha, beta, 2, 3, 42)); diff --git a/tests/unit_tests/blas/level2/gbmv_usm.cpp b/tests/unit_tests/blas/level2/gbmv_usm.cpp index c017d0fae..ec121e723 100644 --- a/tests/unit_tests/blas/level2/gbmv_usm.cpp +++ b/tests/unit_tests/blas/level2/gbmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n, +int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n, int kl, int ku, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -86,8 +86,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans using fp_ref = typename ref_type_info::type; ::gbmv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), &m_ref, &n_ref, &kl_ref, - &ku_ref, (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + &ku_ref, (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ GBMV. @@ -96,13 +96,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::gbmv(main_queue, transa, m, n, kl, ku, - alpha, A.data(), lda, x.data(), incx, - beta, y.data(), incy, dependencies); + alpha, A.data(), lda, x.data(), incx, + beta, y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::gbmv(main_queue, transa, m, n, kl, ku, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } @@ -124,16 +124,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GBMV:\n" << error.what() << std::endl; } @@ -145,20 +145,20 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans } class GbmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GbmvUsmTests, RealSinglePrecision) { float alpha(2.0); float beta(3.0); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2, 3, - 42)); + oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 2, + 3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1, 1, - 42)); + oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1, + 1, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3, 42)); @@ -187,8 +187,8 @@ TEST_P(GbmvUsmTests, RealDoublePrecision) { oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, -3, - 42)); + oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, -2, + -3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, beta, 1, 1, 42)); @@ -239,14 +239,14 @@ TEST_P(GbmvUsmTests, ComplexDoublePrecision) { oneapi::math::transpose::nontrans, 25, 30, 5, 7, alpha, beta, 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, - beta, 2, 3, 42)); + oneapi::math::transpose::trans, 25, 30, 5, 7, + alpha, beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, - beta, -2, -3, 42)); + oneapi::math::transpose::trans, 25, 30, 5, 7, + alpha, beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, 5, 7, alpha, - beta, 1, 1, 42)); + oneapi::math::transpose::trans, 25, 30, 5, 7, + alpha, beta, 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans, 25, 30, 5, 7, alpha, beta, 2, 3, 42)); diff --git a/tests/unit_tests/blas/level2/gemv.cpp b/tests/unit_tests/blas/level2/gemv.cpp index d75720278..7d0999b1d 100644 --- a/tests/unit_tests/blas/level2/gemv.cpp +++ b/tests/unit_tests/blas/level2/gemv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n, +int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. int x_len = outer_dimension(transa, m, n); @@ -65,18 +65,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans using fp_ref = typename ref_type_info::type; ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), &m_ref, &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ GEMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMV:\n" << e.what() << std::endl; print_error_code(e); @@ -95,11 +95,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::gemv(main_queue, transa, m, n, alpha, A_buffer, - lda, x_buffer, incx, beta, y_buffer, incy); + lda, x_buffer, incx, beta, y_buffer, incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::gemv(main_queue, transa, m, n, alpha, A_buffer, lda, - x_buffer, incx, beta, y_buffer, incy); + x_buffer, incx, beta, y_buffer, incy); break; default: break; } @@ -119,16 +119,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV:\n" << error.what() << std::endl; } @@ -139,19 +139,21 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans return (int)good; } -class GemvTests : public ::testing::TestWithParam> { +class GemvTests : public ::testing::TestWithParam> { }; TEST_P(GemvTests, RealSinglePrecision) { float alpha(2.0); float beta(3.0); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 2, 3, 42)); + oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 2, 3, + 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans, 25, 30, alpha, beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1, 42)); + oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1, + 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, alpha, beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), @@ -176,7 +178,8 @@ TEST_P(GemvTests, RealDoublePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, alpha, beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, alpha, beta, -2, -3, 42)); + oneapi::math::transpose::trans, 25, 30, alpha, beta, -2, -3, + 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, alpha, beta, 1, 1, 42)); } @@ -185,23 +188,23 @@ TEST_P(GemvTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); std::complex beta(3.0, -1.5); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 2, 3, alpha, beta, - 2, 3, 42)); + oneapi::math::transpose::nontrans, 2, 3, alpha, + beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 2, 3, alpha, beta, - -2, -3, 42)); + oneapi::math::transpose::nontrans, 2, 3, alpha, + beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 2, 3, alpha, beta, - 1, 1, 42)); + oneapi::math::transpose::nontrans, 2, 3, alpha, + beta, 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 2, 3, alpha, beta, 2, - 3, 42)); + oneapi::math::transpose::trans, 2, 3, alpha, beta, + 2, 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 2, 3, alpha, beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 2, 3, alpha, beta, 1, - 1, 42)); + oneapi::math::transpose::trans, 2, 3, alpha, beta, + 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans, 2, 3, alpha, beta, 2, 3, 42)); @@ -228,14 +231,14 @@ TEST_P(GemvTests, ComplexDoublePrecision) { oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, alpha, beta, - 2, 3, 42)); + oneapi::math::transpose::trans, 25, 30, alpha, + beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, alpha, beta, - -2, -3, 42)); + oneapi::math::transpose::trans, 25, 30, alpha, + beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, alpha, beta, - 1, 1, 42)); + oneapi::math::transpose::trans, 25, 30, alpha, + beta, 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans, 25, 30, alpha, beta, 2, 3, 42)); diff --git a/tests/unit_tests/blas/level2/gemv_usm.cpp b/tests/unit_tests/blas/level2/gemv_usm.cpp index 5729914d2..c5f9d4dec 100644 --- a/tests/unit_tests/blas/level2/gemv_usm.cpp +++ b/tests/unit_tests/blas/level2/gemv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n, +int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose transa, int m, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMV:\n" << e.what() << std::endl; print_error_code(e); @@ -85,8 +85,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans using fp_ref = typename ref_type_info::type; ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), &m_ref, &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ GEMV. @@ -95,13 +95,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::gemv(main_queue, transa, m, n, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: - done = oneapi::math::blas::row_major::gemv(main_queue, transa, m, n, alpha, A.data(), - lda, x.data(), incx, beta, y.data(), incy, - dependencies); + done = oneapi::math::blas::row_major::gemv(main_queue, transa, m, n, alpha, + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } @@ -123,16 +123,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV:\n" << error.what() << std::endl; } @@ -144,18 +144,20 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::transpose trans } class GemvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemvUsmTests, RealSinglePrecision) { float alpha(2.0); float beta(3.0); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 2, 3, 42)); + oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 2, 3, + 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::nontrans, 25, 30, alpha, beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1, 42)); + oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1, + 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, alpha, beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), @@ -180,7 +182,8 @@ TEST_P(GemvUsmTests, RealDoublePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, alpha, beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, alpha, beta, -2, -3, 42)); + oneapi::math::transpose::trans, 25, 30, alpha, beta, -2, -3, + 42)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::trans, 25, 30, alpha, beta, 1, 1, 42)); } @@ -230,14 +233,14 @@ TEST_P(GemvUsmTests, ComplexDoublePrecision) { oneapi::math::transpose::nontrans, 25, 30, alpha, beta, 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, alpha, beta, - 2, 3, 42)); + oneapi::math::transpose::trans, 25, 30, alpha, + beta, 2, 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, alpha, beta, - -2, -3, 42)); + oneapi::math::transpose::trans, 25, 30, alpha, + beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::transpose::trans, 25, 30, alpha, beta, - 1, 1, 42)); + oneapi::math::transpose::trans, 25, 30, alpha, + beta, 1, 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::transpose::conjtrans, 25, 30, alpha, beta, 2, 3, 42)); diff --git a/tests/unit_tests/blas/level2/ger.cpp b/tests/unit_tests/blas/level2/ger.cpp index 46b92ff17..9c207a7c4 100644 --- a/tests/unit_tests/blas/level2/ger.cpp +++ b/tests/unit_tests/blas/level2/ger.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. @@ -62,18 +62,18 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::ger(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::ger(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GER. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GER:\n" << e.what() << std::endl; print_error_code(e); @@ -92,11 +92,11 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::ger(main_queue, m, n, alpha, x_buffer, incx, - y_buffer, incy, A_buffer, lda); + y_buffer, incy, A_buffer, lda); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::ger(main_queue, m, n, alpha, x_buffer, incx, y_buffer, - incy, A_buffer, lda); + oneapi::math::blas::row_major::ger(main_queue, m, n, alpha, x_buffer, incx, + y_buffer, incy, A_buffer, lda); break; default: break; } @@ -114,16 +114,16 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GER:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GER:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i return (int)good; } -class GerTests : public ::testing::TestWithParam> { +class GerTests : public ::testing::TestWithParam> { }; TEST_P(GerTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/ger_usm.cpp b/tests/unit_tests/blas/level2/ger_usm.cpp index 9d963e68a..c0a523785 100644 --- a/tests/unit_tests/blas/level2/ger_usm.cpp +++ b/tests/unit_tests/blas/level2/ger_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GER:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::ger(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::ger(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GER. @@ -91,14 +91,14 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - done = oneapi::math::blas::column_major::ger(main_queue, m, n, alpha, x.data(), incx, - y.data(), incy, A.data(), lda, - dependencies); + done = oneapi::math::blas::column_major::ger(main_queue, m, n, alpha, x.data(), + incx, y.data(), incy, A.data(), lda, + dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::ger(main_queue, m, n, alpha, x.data(), incx, - y.data(), incy, A.data(), lda, dependencies); + y.data(), incy, A.data(), lda, dependencies); break; default: break; } @@ -120,16 +120,16 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GER:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GER:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i } class GerUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GerUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/gerc.cpp b/tests/unit_tests/blas/level2/gerc.cpp index 316c496a4..8d89f8347 100644 --- a/tests/unit_tests/blas/level2/gerc.cpp +++ b/tests/unit_tests/blas/level2/gerc.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. @@ -62,18 +62,18 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::gerc(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::gerc(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GERC. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GERC:\n" << e.what() << std::endl; print_error_code(e); @@ -92,11 +92,11 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::gerc(main_queue, m, n, alpha, x_buffer, incx, - y_buffer, incy, A_buffer, lda); + y_buffer, incy, A_buffer, lda); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::gerc(main_queue, m, n, alpha, x_buffer, incx, - y_buffer, incy, A_buffer, lda); + y_buffer, incy, A_buffer, lda); break; default: break; } @@ -107,23 +107,23 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gerc, m, n, alpha, - x_buffer, incx, y_buffer, incy, A_buffer, lda); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gerc, m, n, + alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GERC:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GERC:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i return (int)good; } -class GercTests : public ::testing::TestWithParam> { +class GercTests : public ::testing::TestWithParam> { }; TEST_P(GercTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/gerc_usm.cpp b/tests/unit_tests/blas/level2/gerc_usm.cpp index cbca89a4e..a2d9d28cb 100644 --- a/tests/unit_tests/blas/level2/gerc_usm.cpp +++ b/tests/unit_tests/blas/level2/gerc_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GERC:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::gerc(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::gerc(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GERC. @@ -92,13 +92,13 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::gerc(main_queue, m, n, alpha, x.data(), - incx, y.data(), incy, A.data(), lda, - dependencies); + incx, y.data(), incy, A.data(), lda, + dependencies); break; case oneapi::math::layout::row_major: - done = - oneapi::math::blas::row_major::gerc(main_queue, m, n, alpha, x.data(), incx, - y.data(), incy, A.data(), lda, dependencies); + done = oneapi::math::blas::row_major::gerc(main_queue, m, n, alpha, x.data(), incx, + y.data(), incy, A.data(), lda, + dependencies); break; default: break; } @@ -111,8 +111,8 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gerc, m, n, alpha, - x.data(), incx, y.data(), incy, A.data(), lda, + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::gerc, m, n, + alpha, x.data(), incx, y.data(), incy, A.data(), lda, dependencies); break; default: break; @@ -120,16 +120,16 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GERC:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GERC:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i } class GercUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GercUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/geru.cpp b/tests/unit_tests/blas/level2/geru.cpp index 70435af74..7a44656b1 100644 --- a/tests/unit_tests/blas/level2/geru.cpp +++ b/tests/unit_tests/blas/level2/geru.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. @@ -62,18 +62,18 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::geru(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::geru(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GERU. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GERU:\n" << e.what() << std::endl; print_error_code(e); @@ -92,11 +92,11 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::geru(main_queue, m, n, alpha, x_buffer, incx, - y_buffer, incy, A_buffer, lda); + y_buffer, incy, A_buffer, lda); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::geru(main_queue, m, n, alpha, x_buffer, incx, - y_buffer, incy, A_buffer, lda); + y_buffer, incy, A_buffer, lda); break; default: break; } @@ -107,23 +107,23 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::geru, m, n, alpha, - x_buffer, incx, y_buffer, incy, A_buffer, lda); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::geru, m, n, + alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GERU:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GERU:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i return (int)good; } -class GeruTests : public ::testing::TestWithParam> { +class GeruTests : public ::testing::TestWithParam> { }; TEST_P(GeruTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/geru_usm.cpp b/tests/unit_tests/blas/level2/geru_usm.cpp index b470ffbdc..6bf155795 100644 --- a/tests/unit_tests/blas/level2/geru_usm.cpp +++ b/tests/unit_tests/blas/level2/geru_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::math::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GERU:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::geru(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::geru(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GERU. @@ -92,13 +92,13 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::geru(main_queue, m, n, alpha, x.data(), - incx, y.data(), incy, A.data(), lda, - dependencies); + incx, y.data(), incy, A.data(), lda, + dependencies); break; case oneapi::math::layout::row_major: - done = - oneapi::math::blas::row_major::geru(main_queue, m, n, alpha, x.data(), incx, - y.data(), incy, A.data(), lda, dependencies); + done = oneapi::math::blas::row_major::geru(main_queue, m, n, alpha, x.data(), incx, + y.data(), incy, A.data(), lda, + dependencies); break; default: break; } @@ -111,8 +111,8 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::geru, m, n, alpha, - x.data(), incx, y.data(), incy, A.data(), lda, + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::geru, m, n, + alpha, x.data(), incx, y.data(), incy, A.data(), lda, dependencies); break; default: break; @@ -120,16 +120,16 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GERU:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GERU:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::math::layout layout, int m, int n, fp alpha, int i } class GeruUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GeruUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/hbmv.cpp b/tests/unit_tests/blas/level2/hbmv.cpp index 02c1e0f15..bd1fc71f2 100644 --- a/tests/unit_tests/blas/level2/hbmv.cpp +++ b/tests/unit_tests/blas/level2/hbmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. vector x, y, y_ref, A; @@ -63,18 +63,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::hbmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, &k_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HBMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -93,12 +93,12 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::hbmv(main_queue, upper_lower, n, k, alpha, - A_buffer, lda, x_buffer, incx, beta, y_buffer, - incy); + A_buffer, lda, x_buffer, incx, beta, + y_buffer, incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::hbmv(main_queue, upper_lower, n, k, alpha, A_buffer, - lda, x_buffer, incx, beta, y_buffer, incy); + lda, x_buffer, incx, beta, y_buffer, incy); break; default: break; } @@ -110,24 +110,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe beta, y_buffer, incy); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hbmv, upper_lower, - n, k, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer, - incy); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hbmv, + upper_lower, n, k, alpha, A_buffer, lda, x_buffer, incx, + beta, y_buffer, incy); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HBMV:\n" << error.what() << std::endl; } @@ -138,7 +138,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class HbmvTests : public ::testing::TestWithParam> { +class HbmvTests : public ::testing::TestWithParam> { }; TEST_P(HbmvTests, ComplexSinglePrecision) { @@ -169,11 +169,11 @@ TEST_P(HbmvTests, ComplexDoublePrecision) { std::complex alpha(2.0, -0.5); std::complex beta(3.0, -1.5); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, 3, - 42)); + oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, + 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, 3, - 42)); + oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, + 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, 5, alpha, beta, -2, -3, 42)); @@ -181,11 +181,11 @@ TEST_P(HbmvTests, ComplexDoublePrecision) { oneapi::math::uplo::upper, 30, 5, alpha, beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, 1, - 42)); + oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, + 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, 1, - 42)); + oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, + 1, 42)); } INSTANTIATE_TEST_SUITE_P(HbmvTestSuite, HbmvTests, diff --git a/tests/unit_tests/blas/level2/hbmv_usm.cpp b/tests/unit_tests/blas/level2/hbmv_usm.cpp index 6adbba106..325013029 100644 --- a/tests/unit_tests/blas/level2/hbmv_usm.cpp +++ b/tests/unit_tests/blas/level2/hbmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -84,8 +84,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::hbmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, &k_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HBMV. @@ -94,13 +94,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::hbmv(main_queue, upper_lower, n, k, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::hbmv(main_queue, upper_lower, n, k, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } @@ -113,25 +113,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe beta, y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hbmv, upper_lower, - n, k, alpha, A.data(), lda, x.data(), incx, beta, y.data(), - incy, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hbmv, + upper_lower, n, k, alpha, A.data(), lda, x.data(), incx, + beta, y.data(), incy, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HBMV:\n" << error.what() << std::endl; } @@ -143,7 +143,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class HbmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HbmvUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); @@ -173,11 +173,11 @@ TEST_P(HbmvUsmTests, ComplexDoublePrecision) { std::complex alpha(2.0, -0.5); std::complex beta(3.0, -1.5); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, 3, - 42)); + oneapi::math::uplo::lower, 30, 5, alpha, beta, 2, + 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, 3, - 42)); + oneapi::math::uplo::upper, 30, 5, alpha, beta, 2, + 3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, 5, alpha, beta, -2, -3, 42)); @@ -185,11 +185,11 @@ TEST_P(HbmvUsmTests, ComplexDoublePrecision) { oneapi::math::uplo::upper, 30, 5, alpha, beta, -2, -3, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, 1, - 42)); + oneapi::math::uplo::lower, 30, 5, alpha, beta, 1, + 1, 42)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, 1, - 42)); + oneapi::math::uplo::upper, 30, 5, alpha, beta, 1, + 1, 42)); } INSTANTIATE_TEST_SUITE_P(HbmvUsmTestSuite, HbmvUsmTests, diff --git a/tests/unit_tests/blas/level2/hemv.cpp b/tests/unit_tests/blas/level2/hemv.cpp index c45ebd3a4..26d71956c 100644 --- a/tests/unit_tests/blas/level2/hemv.cpp +++ b/tests/unit_tests/blas/level2/hemv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. vector x, y, y_ref, A; @@ -62,18 +62,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::hemv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HEMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HEMV:\n" << e.what() << std::endl; print_error_code(e); @@ -92,11 +92,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::hemv(main_queue, upper_lower, n, alpha, A_buffer, - lda, x_buffer, incx, beta, y_buffer, incy); + lda, x_buffer, incx, beta, y_buffer, incy); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::hemv(main_queue, upper_lower, n, alpha, A_buffer, lda, - x_buffer, incx, beta, y_buffer, incy); + oneapi::math::blas::row_major::hemv(main_queue, upper_lower, n, alpha, A_buffer, + lda, x_buffer, incx, beta, y_buffer, incy); break; default: break; } @@ -108,24 +108,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe y_buffer, incy); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hemv, upper_lower, - n, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer, - incy); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hemv, + upper_lower, n, alpha, A_buffer, lda, x_buffer, incx, beta, + y_buffer, incy); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HEMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HEMV:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class HemvTests : public ::testing::TestWithParam> { +class HemvTests : public ::testing::TestWithParam> { }; TEST_P(HemvTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/hemv_usm.cpp b/tests/unit_tests/blas/level2/hemv_usm.cpp index 07f0cc386..3ca3fd7cf 100644 --- a/tests/unit_tests/blas/level2/hemv_usm.cpp +++ b/tests/unit_tests/blas/level2/hemv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HEMV:\n" << e.what() << std::endl; print_error_code(e); @@ -83,8 +83,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::hemv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HEMV. @@ -93,13 +93,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::hemv(main_queue, upper_lower, n, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::hemv(main_queue, upper_lower, n, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } @@ -112,25 +112,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hemv, upper_lower, - n, alpha, A.data(), lda, x.data(), incx, beta, y.data(), - incy, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hemv, + upper_lower, n, alpha, A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HEMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HEMV:\n" << error.what() << std::endl; } @@ -142,7 +142,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class HemvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HemvUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/her.cpp b/tests/unit_tests/blas/level2/her.cpp index 9bc431053..eaeb82048 100644 --- a/tests/unit_tests/blas/level2/her.cpp +++ b/tests/unit_tests/blas/level2/her.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp_scalar alpha, int incx, int lda) { // Prepare data. vector x, A_ref, A; @@ -61,17 +61,17 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_scalar_ref = typename ref_type_info::type; ::her(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_scalar_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data(), &lda_ref); + (fp_scalar_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ HER. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HER:\n" << e.what() << std::endl; print_error_code(e); @@ -89,11 +89,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::her(main_queue, upper_lower, n, alpha, x_buffer, - incx, A_buffer, lda); + incx, A_buffer, lda); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::her(main_queue, upper_lower, n, alpha, x_buffer, incx, - A_buffer, lda); + oneapi::math::blas::row_major::her(main_queue, upper_lower, n, alpha, x_buffer, + incx, A_buffer, lda); break; default: break; } @@ -111,16 +111,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HER:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HER:\n" << error.what() << std::endl; } @@ -131,7 +131,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class HerTests : public ::testing::TestWithParam> { +class HerTests : public ::testing::TestWithParam> { }; TEST_P(HerTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/her2.cpp b/tests/unit_tests/blas/level2/her2.cpp index 9ddb0ee3f..6ab26b315 100644 --- a/tests/unit_tests/blas/level2/her2.cpp +++ b/tests/unit_tests/blas/level2/her2.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. vector x, y, A_ref, A; @@ -62,18 +62,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::her2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ HER2. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HER2:\n" << e.what() << std::endl; print_error_code(e); @@ -92,11 +92,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::her2(main_queue, upper_lower, n, alpha, x_buffer, - incx, y_buffer, incy, A_buffer, lda); + incx, y_buffer, incy, A_buffer, lda); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::her2(main_queue, upper_lower, n, alpha, x_buffer, - incx, y_buffer, incy, A_buffer, lda); + incx, y_buffer, incy, A_buffer, lda); break; default: break; } @@ -108,23 +108,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe A_buffer, lda); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her2, upper_lower, - n, alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her2, + upper_lower, n, alpha, x_buffer, incx, y_buffer, incy, + A_buffer, lda); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HER2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HER2:\n" << error.what() << std::endl; } @@ -135,7 +136,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class Her2Tests : public ::testing::TestWithParam> { +class Her2Tests : public ::testing::TestWithParam> { }; TEST_P(Her2Tests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/her2_usm.cpp b/tests/unit_tests/blas/level2/her2_usm.cpp index 93c1d1f04..1fb29c132 100644 --- a/tests/unit_tests/blas/level2/her2_usm.cpp +++ b/tests/unit_tests/blas/level2/her2_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HER2:\n" << e.what() << std::endl; print_error_code(e); @@ -83,8 +83,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::her2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ HER2. @@ -93,13 +93,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::her2(main_queue, upper_lower, n, alpha, - x.data(), incx, y.data(), incy, - A.data(), lda, dependencies); + x.data(), incx, y.data(), incy, + A.data(), lda, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::her2(main_queue, upper_lower, n, alpha, - x.data(), incx, y.data(), incy, A.data(), - lda, dependencies); + x.data(), incx, y.data(), incy, A.data(), + lda, dependencies); break; default: break; } @@ -112,25 +112,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe A.data(), lda, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her2, upper_lower, - n, alpha, x.data(), incx, y.data(), incy, A.data(), lda, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::her2, + upper_lower, n, alpha, x.data(), incx, y.data(), incy, + A.data(), lda, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HER2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HER2:\n" << error.what() << std::endl; } @@ -142,7 +142,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class Her2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Her2UsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/her_usm.cpp b/tests/unit_tests/blas/level2/her_usm.cpp index 0a7f86afe..fa614dd25 100644 --- a/tests/unit_tests/blas/level2/her_usm.cpp +++ b/tests/unit_tests/blas/level2/her_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp_scalar alpha, int incx, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HER:\n" << e.what() << std::endl; print_error_code(e); @@ -82,7 +82,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_scalar_ref = typename ref_type_info::type; ::her(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_scalar_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data(), &lda_ref); + (fp_scalar_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ HER. @@ -116,16 +116,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HER:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HER:\n" << error.what() << std::endl; } @@ -137,7 +137,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class HerUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HerUsmTests, ComplexSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/hpmv.cpp b/tests/unit_tests/blas/level2/hpmv.cpp index 8894d1e0b..08b221536 100644 --- a/tests/unit_tests/blas/level2/hpmv.cpp +++ b/tests/unit_tests/blas/level2/hpmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy) { // Prepare data. vector x, y, y_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::hpmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HPMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPMV:\n" << e.what() << std::endl; print_error_code(e); @@ -91,11 +91,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::hpmv(main_queue, upper_lower, n, alpha, A_buffer, - x_buffer, incx, beta, y_buffer, incy); + x_buffer, incx, beta, y_buffer, incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::hpmv(main_queue, upper_lower, n, alpha, A_buffer, - x_buffer, incx, beta, y_buffer, incy); + x_buffer, incx, beta, y_buffer, incy); break; default: break; } @@ -107,23 +107,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe y_buffer, incy); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpmv, upper_lower, - n, alpha, A_buffer, x_buffer, incx, beta, y_buffer, incy); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpmv, + upper_lower, n, alpha, A_buffer, x_buffer, incx, beta, + y_buffer, incy); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPMV:\n" << error.what() << std::endl; } @@ -134,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class HpmvTests : public ::testing::TestWithParam> { +class HpmvTests : public ::testing::TestWithParam> { }; TEST_P(HpmvTests, ComplexSinglePrecision) { @@ -145,9 +146,11 @@ TEST_P(HpmvTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, beta, 2, 3)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, 30, alpha, beta, -2, -3)); + oneapi::math::uplo::lower, 30, alpha, beta, -2, + -3)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, 30, alpha, beta, -2, -3)); + oneapi::math::uplo::upper, 30, alpha, beta, -2, + -3)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, beta, 1, 1)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), diff --git a/tests/unit_tests/blas/level2/hpmv_usm.cpp b/tests/unit_tests/blas/level2/hpmv_usm.cpp index d2302f573..b81adfccb 100644 --- a/tests/unit_tests/blas/level2/hpmv_usm.cpp +++ b/tests/unit_tests/blas/level2/hpmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPMV:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::hpmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HPMV. @@ -92,13 +92,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::hpmv(main_queue, upper_lower, n, alpha, - A.data(), x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), x.data(), incx, beta, + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::hpmv(main_queue, upper_lower, n, alpha, - A.data(), x.data(), incx, beta, y.data(), - incy, dependencies); + A.data(), x.data(), incx, beta, y.data(), + incy, dependencies); break; default: break; } @@ -111,25 +111,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpmv, upper_lower, - n, alpha, A.data(), x.data(), incx, beta, y.data(), incy, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpmv, + upper_lower, n, alpha, A.data(), x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPMV:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class HpmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HpmvUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); @@ -151,9 +151,11 @@ TEST_P(HpmvUsmTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, beta, 2, 3)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, 30, alpha, beta, -2, -3)); + oneapi::math::uplo::lower, 30, alpha, beta, -2, + -3)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, 30, alpha, beta, -2, -3)); + oneapi::math::uplo::upper, 30, alpha, beta, -2, + -3)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, beta, 1, 1)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), diff --git a/tests/unit_tests/blas/level2/hpr.cpp b/tests/unit_tests/blas/level2/hpr.cpp index 3ea336bab..492c07457 100644 --- a/tests/unit_tests/blas/level2/hpr.cpp +++ b/tests/unit_tests/blas/level2/hpr.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp_scalar alpha, int incx) { // Prepare data. vector x, A_ref, A; @@ -61,17 +61,17 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_scalar_ref = typename ref_type_info::type; ::hpr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_scalar_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data()); + (fp_scalar_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data()); // Call DPC++ HPR. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPR:\n" << e.what() << std::endl; print_error_code(e); @@ -89,11 +89,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::hpr(main_queue, upper_lower, n, alpha, x_buffer, - incx, A_buffer); + incx, A_buffer); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::hpr(main_queue, upper_lower, n, alpha, x_buffer, incx, - A_buffer); + oneapi::math::blas::row_major::hpr(main_queue, upper_lower, n, alpha, x_buffer, + incx, A_buffer); break; default: break; } @@ -111,16 +111,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPR:\n" << error.what() << std::endl; } @@ -131,45 +131,53 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class HprTests : public ::testing::TestWithParam> { +class HprTests : public ::testing::TestWithParam> { }; TEST_P(HprTests, ComplexSinglePrecision) { float alpha(2.0); - EXPECT_TRUEORSKIP((test, float>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, 2))); - EXPECT_TRUEORSKIP((test, float>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, 2))); + EXPECT_TRUEORSKIP( + (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::lower, 30, alpha, 2))); + EXPECT_TRUEORSKIP( + (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::upper, 30, alpha, 2))); EXPECT_TRUEORSKIP( (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, -2))); EXPECT_TRUEORSKIP( (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, -2))); - EXPECT_TRUEORSKIP((test, float>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, 1))); - EXPECT_TRUEORSKIP((test, float>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, 1))); + EXPECT_TRUEORSKIP( + (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::lower, 30, alpha, 1))); + EXPECT_TRUEORSKIP( + (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::upper, 30, alpha, 1))); } TEST_P(HprTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); double alpha(2.0); - EXPECT_TRUEORSKIP((test, double>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, 2))); - EXPECT_TRUEORSKIP((test, double>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, 2))); + EXPECT_TRUEORSKIP( + (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::lower, 30, alpha, 2))); + EXPECT_TRUEORSKIP( + (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::upper, 30, alpha, 2))); EXPECT_TRUEORSKIP( (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, -2))); EXPECT_TRUEORSKIP( (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, -2))); - EXPECT_TRUEORSKIP((test, double>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, 1))); - EXPECT_TRUEORSKIP((test, double>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, 1))); + EXPECT_TRUEORSKIP( + (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::lower, 30, alpha, 1))); + EXPECT_TRUEORSKIP( + (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::upper, 30, alpha, 1))); } INSTANTIATE_TEST_SUITE_P(HprTestSuite, HprTests, diff --git a/tests/unit_tests/blas/level2/hpr2.cpp b/tests/unit_tests/blas/level2/hpr2.cpp index 5ab9b7d28..d8780b576 100644 --- a/tests/unit_tests/blas/level2/hpr2.cpp +++ b/tests/unit_tests/blas/level2/hpr2.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int incy) { // Prepare data. vector x, y, A_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::hpr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data()); // Call DPC++ HPR2. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPR2:\n" << e.what() << std::endl; print_error_code(e); @@ -91,11 +91,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::hpr2(main_queue, upper_lower, n, alpha, x_buffer, - incx, y_buffer, incy, A_buffer); + incx, y_buffer, incy, A_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::hpr2(main_queue, upper_lower, n, alpha, x_buffer, - incx, y_buffer, incy, A_buffer); + incx, y_buffer, incy, A_buffer); break; default: break; } @@ -107,23 +107,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe A_buffer); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpr2, upper_lower, - n, alpha, x_buffer, incx, y_buffer, incy, A_buffer); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpr2, + upper_lower, n, alpha, x_buffer, incx, y_buffer, incy, + A_buffer); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPR2:\n" << error.what() << std::endl; } @@ -134,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class Hpr2Tests : public ::testing::TestWithParam> { +class Hpr2Tests : public ::testing::TestWithParam> { }; TEST_P(Hpr2Tests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/hpr2_usm.cpp b/tests/unit_tests/blas/level2/hpr2_usm.cpp index 4ac809338..6a9d4c55e 100644 --- a/tests/unit_tests/blas/level2/hpr2_usm.cpp +++ b/tests/unit_tests/blas/level2/hpr2_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPR2:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::hpr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data()); // Call DPC++ HPR2. @@ -92,13 +92,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::hpr2(main_queue, upper_lower, n, alpha, - x.data(), incx, y.data(), incy, - A.data(), dependencies); + x.data(), incx, y.data(), incy, + A.data(), dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::hpr2(main_queue, upper_lower, n, alpha, - x.data(), incx, y.data(), incy, A.data(), - dependencies); + x.data(), incx, y.data(), incy, A.data(), + dependencies); break; default: break; } @@ -111,25 +111,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe A.data(), dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpr2, upper_lower, - n, alpha, x.data(), incx, y.data(), incy, A.data(), - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::hpr2, + upper_lower, n, alpha, x.data(), incx, y.data(), incy, + A.data(), dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPR2:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class Hpr2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Hpr2UsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/hpr_usm.cpp b/tests/unit_tests/blas/level2/hpr_usm.cpp index 003d41f0b..a0e60c9d2 100644 --- a/tests/unit_tests/blas/level2/hpr_usm.cpp +++ b/tests/unit_tests/blas/level2/hpr_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp_scalar alpha, int incx) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPR:\n" << e.what() << std::endl; print_error_code(e); @@ -82,7 +82,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_scalar_ref = typename ref_type_info::type; ::hpr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_scalar_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data()); + (fp_scalar_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data()); // Call DPC++ HPR. @@ -90,12 +90,12 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - done = oneapi::math::blas::column_major::hpr(main_queue, upper_lower, n, alpha, - x.data(), incx, A.data(), dependencies); + done = oneapi::math::blas::column_major::hpr( + main_queue, upper_lower, n, alpha, x.data(), incx, A.data(), dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::hpr(main_queue, upper_lower, n, alpha, - x.data(), incx, A.data(), dependencies); + x.data(), incx, A.data(), dependencies); break; default: break; } @@ -116,16 +116,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPR:\n" << error.what() << std::endl; } @@ -137,44 +137,52 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class HprUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HprUsmTests, ComplexSinglePrecision) { float alpha(2.0); - EXPECT_TRUEORSKIP((test, float>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, 2))); - EXPECT_TRUEORSKIP((test, float>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, 2))); + EXPECT_TRUEORSKIP( + (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::lower, 30, alpha, 2))); + EXPECT_TRUEORSKIP( + (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::upper, 30, alpha, 2))); EXPECT_TRUEORSKIP( (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, -2))); EXPECT_TRUEORSKIP( (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, -2))); - EXPECT_TRUEORSKIP((test, float>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, 1))); - EXPECT_TRUEORSKIP((test, float>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, 1))); + EXPECT_TRUEORSKIP( + (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::lower, 30, alpha, 1))); + EXPECT_TRUEORSKIP( + (test, float>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::upper, 30, alpha, 1))); } TEST_P(HprUsmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); double alpha(2.0); - EXPECT_TRUEORSKIP((test, double>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, 2))); - EXPECT_TRUEORSKIP((test, double>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, 2))); + EXPECT_TRUEORSKIP( + (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::lower, 30, alpha, 2))); + EXPECT_TRUEORSKIP( + (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::upper, 30, alpha, 2))); EXPECT_TRUEORSKIP( (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, -2))); EXPECT_TRUEORSKIP( (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, -2))); - EXPECT_TRUEORSKIP((test, double>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::lower, 30, alpha, 1))); - EXPECT_TRUEORSKIP((test, double>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::uplo::upper, 30, alpha, 1))); + EXPECT_TRUEORSKIP( + (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::lower, 30, alpha, 1))); + EXPECT_TRUEORSKIP( + (test, double>(std::get<0>(GetParam()), std::get<1>(GetParam()), + oneapi::math::uplo::upper, 30, alpha, 1))); } INSTANTIATE_TEST_SUITE_P(HprUsmTestSuite, HprUsmTests, diff --git a/tests/unit_tests/blas/level2/sbmv.cpp b/tests/unit_tests/blas/level2/sbmv.cpp index 9291e14de..64f3a71bd 100644 --- a/tests/unit_tests/blas/level2/sbmv.cpp +++ b/tests/unit_tests/blas/level2/sbmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. vector x, y, y_ref, A; @@ -62,18 +62,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::sbmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, &k_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SBMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -91,12 +91,12 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::sbmv(main_queue, upper_lower, n, k, alpha, - A_buffer, lda, x_buffer, incx, beta, y_buffer, - incy); + A_buffer, lda, x_buffer, incx, beta, + y_buffer, incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::sbmv(main_queue, upper_lower, n, k, alpha, A_buffer, - lda, x_buffer, incx, beta, y_buffer, incy); + lda, x_buffer, incx, beta, y_buffer, incy); break; default: break; } @@ -108,24 +108,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe beta, y_buffer, incy); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::sbmv, upper_lower, - n, k, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer, - incy); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::sbmv, + upper_lower, n, k, alpha, A_buffer, lda, x_buffer, incx, + beta, y_buffer, incy); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SBMV:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class SbmvTests : public ::testing::TestWithParam> { +class SbmvTests : public ::testing::TestWithParam> { }; TEST_P(SbmvTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/sbmv_usm.cpp b/tests/unit_tests/blas/level2/sbmv_usm.cpp index ad0a8b9b0..fd31e5285 100644 --- a/tests/unit_tests/blas/level2/sbmv_usm.cpp +++ b/tests/unit_tests/blas/level2/sbmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, int k, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -83,8 +83,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::sbmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, &k_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SBMV. @@ -93,13 +93,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::sbmv(main_queue, upper_lower, n, k, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::sbmv(main_queue, upper_lower, n, k, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } @@ -112,25 +112,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe beta, y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::sbmv, upper_lower, - n, k, alpha, A.data(), lda, x.data(), incx, beta, y.data(), - incy, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::sbmv, + upper_lower, n, k, alpha, A.data(), lda, x.data(), incx, + beta, y.data(), incy, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SBMV:\n" << error.what() << std::endl; } @@ -142,7 +142,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class SbmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SbmvUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/spmv.cpp b/tests/unit_tests/blas/level2/spmv.cpp index 0fb16c5c5..9935a61da 100644 --- a/tests/unit_tests/blas/level2/spmv.cpp +++ b/tests/unit_tests/blas/level2/spmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy) { // Prepare data. vector x, y, y_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::spmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SPMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPMV:\n" << e.what() << std::endl; print_error_code(e); @@ -91,11 +91,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::spmv(main_queue, upper_lower, n, alpha, A_buffer, - x_buffer, incx, beta, y_buffer, incy); + x_buffer, incx, beta, y_buffer, incy); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::spmv(main_queue, upper_lower, n, alpha, A_buffer, - x_buffer, incx, beta, y_buffer, incy); + x_buffer, incx, beta, y_buffer, incy); break; default: break; } @@ -107,23 +107,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe y_buffer, incy); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spmv, upper_lower, - n, alpha, A_buffer, x_buffer, incx, beta, y_buffer, incy); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spmv, + upper_lower, n, alpha, A_buffer, x_buffer, incx, beta, + y_buffer, incy); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPMV:\n" << error.what() << std::endl; } @@ -134,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class SpmvTests : public ::testing::TestWithParam> { +class SpmvTests : public ::testing::TestWithParam> { }; TEST_P(SpmvTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/spmv_usm.cpp b/tests/unit_tests/blas/level2/spmv_usm.cpp index 7998fc02b..703c25232 100644 --- a/tests/unit_tests/blas/level2/spmv_usm.cpp +++ b/tests/unit_tests/blas/level2/spmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPMV:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::spmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SPMV. @@ -92,13 +92,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::spmv(main_queue, upper_lower, n, alpha, - A.data(), x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), x.data(), incx, beta, + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::spmv(main_queue, upper_lower, n, alpha, - A.data(), x.data(), incx, beta, y.data(), - incy, dependencies); + A.data(), x.data(), incx, beta, y.data(), + incy, dependencies); break; default: break; } @@ -111,25 +111,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spmv, upper_lower, - n, alpha, A.data(), x.data(), incx, beta, y.data(), incy, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spmv, + upper_lower, n, alpha, A.data(), x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPMV:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class SpmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SpmvUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/spr.cpp b/tests/unit_tests/blas/level2/spr.cpp index 7f6706cc3..456335f24 100644 --- a/tests/unit_tests/blas/level2/spr.cpp +++ b/tests/unit_tests/blas/level2/spr.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx) { // Prepare data. vector x, A_ref, A; @@ -60,17 +60,17 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::spr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data()); // Call DPC++ SPR. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPR:\n" << e.what() << std::endl; print_error_code(e); @@ -88,11 +88,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::spr(main_queue, upper_lower, n, alpha, x_buffer, - incx, A_buffer); + incx, A_buffer); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::spr(main_queue, upper_lower, n, alpha, x_buffer, incx, - A_buffer); + oneapi::math::blas::row_major::spr(main_queue, upper_lower, n, alpha, x_buffer, + incx, A_buffer); break; default: break; } @@ -110,16 +110,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPR:\n" << error.what() << std::endl; } @@ -130,7 +130,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class SprTests : public ::testing::TestWithParam> { +class SprTests : public ::testing::TestWithParam> { }; TEST_P(SprTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/spr2.cpp b/tests/unit_tests/blas/level2/spr2.cpp index 6cec654c0..78cfce411 100644 --- a/tests/unit_tests/blas/level2/spr2.cpp +++ b/tests/unit_tests/blas/level2/spr2.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int incy) { // Prepare data. vector x, y, A_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::spr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data()); // Call DPC++ SPR2. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPR2:\n" << e.what() << std::endl; print_error_code(e); @@ -91,11 +91,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::spr2(main_queue, upper_lower, n, alpha, x_buffer, - incx, y_buffer, incy, A_buffer); + incx, y_buffer, incy, A_buffer); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::spr2(main_queue, upper_lower, n, alpha, x_buffer, - incx, y_buffer, incy, A_buffer); + incx, y_buffer, incy, A_buffer); break; default: break; } @@ -107,23 +107,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe A_buffer); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spr2, upper_lower, - n, alpha, x_buffer, incx, y_buffer, incy, A_buffer); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spr2, + upper_lower, n, alpha, x_buffer, incx, y_buffer, incy, + A_buffer); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPR2:\n" << error.what() << std::endl; } @@ -134,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class Spr2Tests : public ::testing::TestWithParam> { +class Spr2Tests : public ::testing::TestWithParam> { }; TEST_P(Spr2Tests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/spr2_usm.cpp b/tests/unit_tests/blas/level2/spr2_usm.cpp index 27e4ce07e..ec283b22e 100644 --- a/tests/unit_tests/blas/level2/spr2_usm.cpp +++ b/tests/unit_tests/blas/level2/spr2_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPR2:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::spr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data()); // Call DPC++ SPR2. @@ -92,13 +92,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::spr2(main_queue, upper_lower, n, alpha, - x.data(), incx, y.data(), incy, - A.data(), dependencies); + x.data(), incx, y.data(), incy, + A.data(), dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::spr2(main_queue, upper_lower, n, alpha, - x.data(), incx, y.data(), incy, A.data(), - dependencies); + x.data(), incx, y.data(), incy, A.data(), + dependencies); break; default: break; } @@ -111,25 +111,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe A.data(), dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spr2, upper_lower, - n, alpha, x.data(), incx, y.data(), incy, A.data(), - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::spr2, + upper_lower, n, alpha, x.data(), incx, y.data(), incy, + A.data(), dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPR2:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class Spr2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Spr2UsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/spr_usm.cpp b/tests/unit_tests/blas/level2/spr_usm.cpp index 20e74159b..e70cbfd74 100644 --- a/tests/unit_tests/blas/level2/spr_usm.cpp +++ b/tests/unit_tests/blas/level2/spr_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPR:\n" << e.what() << std::endl; print_error_code(e); @@ -81,7 +81,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::spr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data()); // Call DPC++ SPR. @@ -89,12 +89,12 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - done = oneapi::math::blas::column_major::spr(main_queue, upper_lower, n, alpha, - x.data(), incx, A.data(), dependencies); + done = oneapi::math::blas::column_major::spr( + main_queue, upper_lower, n, alpha, x.data(), incx, A.data(), dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::spr(main_queue, upper_lower, n, alpha, - x.data(), incx, A.data(), dependencies); + x.data(), incx, A.data(), dependencies); break; default: break; } @@ -115,16 +115,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPR:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class SprUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SprUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/symv.cpp b/tests/unit_tests/blas/level2/symv.cpp index 4e62799e8..6e136c771 100644 --- a/tests/unit_tests/blas/level2/symv.cpp +++ b/tests/unit_tests/blas/level2/symv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. vector x, y, y_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::symv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SYMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYMV:\n" << e.what() << std::endl; print_error_code(e); @@ -91,11 +91,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::symv(main_queue, upper_lower, n, alpha, A_buffer, - lda, x_buffer, incx, beta, y_buffer, incy); + lda, x_buffer, incx, beta, y_buffer, incy); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::symv(main_queue, upper_lower, n, alpha, A_buffer, lda, - x_buffer, incx, beta, y_buffer, incy); + oneapi::math::blas::row_major::symv(main_queue, upper_lower, n, alpha, A_buffer, + lda, x_buffer, incx, beta, y_buffer, incy); break; default: break; } @@ -107,24 +107,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe y_buffer, incy); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::symv, upper_lower, - n, alpha, A_buffer, lda, x_buffer, incx, beta, y_buffer, - incy); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::symv, + upper_lower, n, alpha, A_buffer, lda, x_buffer, incx, beta, + y_buffer, incy); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYMV:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class SymvTests : public ::testing::TestWithParam> { +class SymvTests : public ::testing::TestWithParam> { }; TEST_P(SymvTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/symv_usm.cpp b/tests/unit_tests/blas/level2/symv_usm.cpp index 30b82190c..def858041 100644 --- a/tests/unit_tests/blas/level2/symv_usm.cpp +++ b/tests/unit_tests/blas/level2/symv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYMV:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::symv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SYMV. @@ -92,13 +92,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::symv(main_queue, upper_lower, n, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::symv(main_queue, upper_lower, n, alpha, - A.data(), lda, x.data(), incx, beta, - y.data(), incy, dependencies); + A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } @@ -111,25 +111,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe y.data(), incy, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::symv, upper_lower, - n, alpha, A.data(), lda, x.data(), incx, beta, y.data(), - incy, dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::symv, + upper_lower, n, alpha, A.data(), lda, x.data(), incx, beta, + y.data(), incy, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYMV:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class SymvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SymvUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/syr.cpp b/tests/unit_tests/blas/level2/syr.cpp index 1ce3ecf81..11679eabc 100644 --- a/tests/unit_tests/blas/level2/syr.cpp +++ b/tests/unit_tests/blas/level2/syr.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int lda) { // Prepare data. vector x, A_ref, A; @@ -60,17 +60,17 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::syr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ SYR. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYR:\n" << e.what() << std::endl; print_error_code(e); @@ -88,11 +88,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::syr(main_queue, upper_lower, n, alpha, x_buffer, - incx, A_buffer, lda); + incx, A_buffer, lda); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::syr(main_queue, upper_lower, n, alpha, x_buffer, incx, - A_buffer, lda); + oneapi::math::blas::row_major::syr(main_queue, upper_lower, n, alpha, x_buffer, + incx, A_buffer, lda); break; default: break; } @@ -110,16 +110,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYR:\n" << error.what() << std::endl; } @@ -130,7 +130,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class SyrTests : public ::testing::TestWithParam> { +class SyrTests : public ::testing::TestWithParam> { }; TEST_P(SyrTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/syr2.cpp b/tests/unit_tests/blas/level2/syr2.cpp index 3c3b3ceac..622d5eeac 100644 --- a/tests/unit_tests/blas/level2/syr2.cpp +++ b/tests/unit_tests/blas/level2/syr2.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. vector x, y, A_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::syr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ SYR2. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYR2:\n" << e.what() << std::endl; print_error_code(e); @@ -91,11 +91,11 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::syr2(main_queue, upper_lower, n, alpha, x_buffer, - incx, y_buffer, incy, A_buffer, lda); + incx, y_buffer, incy, A_buffer, lda); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::syr2(main_queue, upper_lower, n, alpha, x_buffer, - incx, y_buffer, incy, A_buffer, lda); + incx, y_buffer, incy, A_buffer, lda); break; default: break; } @@ -107,23 +107,24 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe A_buffer, lda); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr2, upper_lower, - n, alpha, x_buffer, incx, y_buffer, incy, A_buffer, lda); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr2, + upper_lower, n, alpha, x_buffer, incx, y_buffer, incy, + A_buffer, lda); break; default: break; } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYR2:\n" << error.what() << std::endl; } @@ -134,7 +135,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class Syr2Tests : public ::testing::TestWithParam> { +class Syr2Tests : public ::testing::TestWithParam> { }; TEST_P(Syr2Tests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/syr2_usm.cpp b/tests/unit_tests/blas/level2/syr2_usm.cpp index 94d3ce524..c56fc8647 100644 --- a/tests/unit_tests/blas/level2/syr2_usm.cpp +++ b/tests/unit_tests/blas/level2/syr2_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYR2:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::syr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ SYR2. @@ -92,13 +92,13 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::syr2(main_queue, upper_lower, n, alpha, - x.data(), incx, y.data(), incy, - A.data(), lda, dependencies); + x.data(), incx, y.data(), incy, + A.data(), lda, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::syr2(main_queue, upper_lower, n, alpha, - x.data(), incx, y.data(), incy, A.data(), - lda, dependencies); + x.data(), incx, y.data(), incy, A.data(), + lda, dependencies); break; default: break; } @@ -111,25 +111,25 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe A.data(), lda, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr2, upper_lower, - n, alpha, x.data(), incx, y.data(), incy, A.data(), lda, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syr2, + upper_lower, n, alpha, x.data(), incx, y.data(), incy, + A.data(), lda, dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYR2:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class Syr2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Syr2UsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/syr_usm.cpp b/tests/unit_tests/blas/level2/syr_usm.cpp index 9a693ae1e..96992b8fa 100644 --- a/tests/unit_tests/blas/level2/syr_usm.cpp +++ b/tests/unit_tests/blas/level2/syr_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, int n, fp alpha, int incx, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYR:\n" << e.what() << std::endl; print_error_code(e); @@ -81,7 +81,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe using fp_ref = typename ref_type_info::type; ::syr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ SYR. @@ -115,16 +115,16 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYR:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe } class SyrUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SyrUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/tbmv.cpp b/tests/unit_tests/blas/level2/tbmv.cpp index 0fb975581..f2e601b42 100644 --- a/tests/unit_tests/blas/level2/tbmv.cpp +++ b/tests/unit_tests/blas/level2/tbmv.cpp @@ -90,12 +90,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::tbmv(main_queue, upper_lower, transa, unit_nonunit, - n, k, A_buffer, lda, x_buffer, incx); + oneapi::math::blas::column_major::tbmv(main_queue, upper_lower, transa, + unit_nonunit, n, k, A_buffer, lda, x_buffer, + incx); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::tbmv(main_queue, upper_lower, transa, unit_nonunit, n, - k, A_buffer, lda, x_buffer, incx); + oneapi::math::blas::row_major::tbmv(main_queue, upper_lower, transa, unit_nonunit, + n, k, A_buffer, lda, x_buffer, incx); break; default: break; } @@ -107,8 +108,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe x_buffer, incx); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbmv, upper_lower, - transa, unit_nonunit, n, k, A_buffer, lda, x_buffer, incx); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbmv, + upper_lower, transa, unit_nonunit, n, k, A_buffer, lda, + x_buffer, incx); break; default: break; } diff --git a/tests/unit_tests/blas/level2/tbmv_usm.cpp b/tests/unit_tests/blas/level2/tbmv_usm.cpp index 56efc0742..f64cef35b 100644 --- a/tests/unit_tests/blas/level2/tbmv_usm.cpp +++ b/tests/unit_tests/blas/level2/tbmv_usm.cpp @@ -93,13 +93,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::tbmv(main_queue, upper_lower, transa, - unit_nonunit, n, k, A.data(), lda, - x.data(), incx, dependencies); + unit_nonunit, n, k, A.data(), lda, + x.data(), incx, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::tbmv(main_queue, upper_lower, transa, - unit_nonunit, n, k, A.data(), lda, - x.data(), incx, dependencies); + unit_nonunit, n, k, A.data(), lda, + x.data(), incx, dependencies); break; default: break; } @@ -112,9 +112,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe x.data(), incx, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbmv, upper_lower, - transa, unit_nonunit, n, k, A.data(), lda, x.data(), incx, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbmv, + upper_lower, transa, unit_nonunit, n, k, A.data(), lda, + x.data(), incx, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level2/tbsv.cpp b/tests/unit_tests/blas/level2/tbsv.cpp index 7127a98d1..5747091d5 100644 --- a/tests/unit_tests/blas/level2/tbsv.cpp +++ b/tests/unit_tests/blas/level2/tbsv.cpp @@ -90,12 +90,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::tbsv(main_queue, upper_lower, transa, unit_nonunit, - n, k, A_buffer, lda, x_buffer, incx); + oneapi::math::blas::column_major::tbsv(main_queue, upper_lower, transa, + unit_nonunit, n, k, A_buffer, lda, x_buffer, + incx); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::tbsv(main_queue, upper_lower, transa, unit_nonunit, n, - k, A_buffer, lda, x_buffer, incx); + oneapi::math::blas::row_major::tbsv(main_queue, upper_lower, transa, unit_nonunit, + n, k, A_buffer, lda, x_buffer, incx); break; default: break; } @@ -107,8 +108,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe x_buffer, incx); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbsv, upper_lower, - transa, unit_nonunit, n, k, A_buffer, lda, x_buffer, incx); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbsv, + upper_lower, transa, unit_nonunit, n, k, A_buffer, lda, + x_buffer, incx); break; default: break; } diff --git a/tests/unit_tests/blas/level2/tbsv_usm.cpp b/tests/unit_tests/blas/level2/tbsv_usm.cpp index 769f28669..20b8a947e 100644 --- a/tests/unit_tests/blas/level2/tbsv_usm.cpp +++ b/tests/unit_tests/blas/level2/tbsv_usm.cpp @@ -93,13 +93,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::tbsv(main_queue, upper_lower, transa, - unit_nonunit, n, k, A.data(), lda, - x.data(), incx, dependencies); + unit_nonunit, n, k, A.data(), lda, + x.data(), incx, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::tbsv(main_queue, upper_lower, transa, - unit_nonunit, n, k, A.data(), lda, - x.data(), incx, dependencies); + unit_nonunit, n, k, A.data(), lda, + x.data(), incx, dependencies); break; default: break; } @@ -112,9 +112,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe x.data(), incx, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbsv, upper_lower, - transa, unit_nonunit, n, k, A.data(), lda, x.data(), incx, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tbsv, + upper_lower, transa, unit_nonunit, n, k, A.data(), lda, + x.data(), incx, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level2/tpmv.cpp b/tests/unit_tests/blas/level2/tpmv.cpp index 558fd901a..b52a50656 100644 --- a/tests/unit_tests/blas/level2/tpmv.cpp +++ b/tests/unit_tests/blas/level2/tpmv.cpp @@ -88,12 +88,12 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::tpmv(main_queue, upper_lower, transa, unit_nonunit, - n, A_buffer, x_buffer, incx); + oneapi::math::blas::column_major::tpmv(main_queue, upper_lower, transa, + unit_nonunit, n, A_buffer, x_buffer, incx); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::tpmv(main_queue, upper_lower, transa, unit_nonunit, n, - A_buffer, x_buffer, incx); + oneapi::math::blas::row_major::tpmv(main_queue, upper_lower, transa, unit_nonunit, + n, A_buffer, x_buffer, incx); break; default: break; } @@ -105,8 +105,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe incx); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpmv, upper_lower, - transa, unit_nonunit, n, A_buffer, x_buffer, incx); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpmv, + upper_lower, transa, unit_nonunit, n, A_buffer, x_buffer, + incx); break; default: break; } diff --git a/tests/unit_tests/blas/level2/tpmv_usm.cpp b/tests/unit_tests/blas/level2/tpmv_usm.cpp index 04e0bae47..40722a09c 100644 --- a/tests/unit_tests/blas/level2/tpmv_usm.cpp +++ b/tests/unit_tests/blas/level2/tpmv_usm.cpp @@ -91,13 +91,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::tpmv(main_queue, upper_lower, transa, - unit_nonunit, n, A.data(), x.data(), - incx, dependencies); + unit_nonunit, n, A.data(), x.data(), + incx, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::tpmv(main_queue, upper_lower, transa, - unit_nonunit, n, A.data(), x.data(), incx, - dependencies); + unit_nonunit, n, A.data(), x.data(), + incx, dependencies); break; default: break; } @@ -110,9 +110,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe incx, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpmv, upper_lower, - transa, unit_nonunit, n, A.data(), x.data(), incx, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpmv, + upper_lower, transa, unit_nonunit, n, A.data(), x.data(), + incx, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level2/tpsv.cpp b/tests/unit_tests/blas/level2/tpsv.cpp index 2f7e0423c..daebf6d58 100644 --- a/tests/unit_tests/blas/level2/tpsv.cpp +++ b/tests/unit_tests/blas/level2/tpsv.cpp @@ -88,12 +88,12 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::tpsv(main_queue, upper_lower, transa, unit_nonunit, - n, A_buffer, x_buffer, incx); + oneapi::math::blas::column_major::tpsv(main_queue, upper_lower, transa, + unit_nonunit, n, A_buffer, x_buffer, incx); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::tpsv(main_queue, upper_lower, transa, unit_nonunit, n, - A_buffer, x_buffer, incx); + oneapi::math::blas::row_major::tpsv(main_queue, upper_lower, transa, unit_nonunit, + n, A_buffer, x_buffer, incx); break; default: break; } @@ -105,8 +105,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe incx); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpsv, upper_lower, - transa, unit_nonunit, n, A_buffer, x_buffer, incx); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpsv, + upper_lower, transa, unit_nonunit, n, A_buffer, x_buffer, + incx); break; default: break; } diff --git a/tests/unit_tests/blas/level2/tpsv_usm.cpp b/tests/unit_tests/blas/level2/tpsv_usm.cpp index f3980ed7f..a41d0e3a0 100644 --- a/tests/unit_tests/blas/level2/tpsv_usm.cpp +++ b/tests/unit_tests/blas/level2/tpsv_usm.cpp @@ -91,13 +91,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::tpsv(main_queue, upper_lower, transa, - unit_nonunit, n, A.data(), x.data(), - incx, dependencies); + unit_nonunit, n, A.data(), x.data(), + incx, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::tpsv(main_queue, upper_lower, transa, - unit_nonunit, n, A.data(), x.data(), incx, - dependencies); + unit_nonunit, n, A.data(), x.data(), + incx, dependencies); break; default: break; } @@ -110,9 +110,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe incx, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpsv, upper_lower, - transa, unit_nonunit, n, A.data(), x.data(), incx, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::tpsv, + upper_lower, transa, unit_nonunit, n, A.data(), x.data(), + incx, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level2/trmv.cpp b/tests/unit_tests/blas/level2/trmv.cpp index 0ea3371bd..1a99590cd 100644 --- a/tests/unit_tests/blas/level2/trmv.cpp +++ b/tests/unit_tests/blas/level2/trmv.cpp @@ -48,7 +48,8 @@ namespace { template int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx, int lda) { + oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx, + int lda) { // Prepare data. vector x, x_ref, A; rand_vector(x, n, incx); @@ -88,12 +89,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::trmv(main_queue, upper_lower, transa, unit_nonunit, - n, A_buffer, lda, x_buffer, incx); + oneapi::math::blas::column_major::trmv(main_queue, upper_lower, transa, + unit_nonunit, n, A_buffer, lda, x_buffer, + incx); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::trmv(main_queue, upper_lower, transa, unit_nonunit, n, - A_buffer, lda, x_buffer, incx); + oneapi::math::blas::row_major::trmv(main_queue, upper_lower, transa, unit_nonunit, + n, A_buffer, lda, x_buffer, incx); break; default: break; } @@ -105,8 +107,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe x_buffer, incx); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trmv, upper_lower, - transa, unit_nonunit, n, A_buffer, lda, x_buffer, incx); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trmv, + upper_lower, transa, unit_nonunit, n, A_buffer, lda, + x_buffer, incx); break; default: break; } diff --git a/tests/unit_tests/blas/level2/trmv_usm.cpp b/tests/unit_tests/blas/level2/trmv_usm.cpp index 1b7fd27c6..d11d9a68a 100644 --- a/tests/unit_tests/blas/level2/trmv_usm.cpp +++ b/tests/unit_tests/blas/level2/trmv_usm.cpp @@ -48,7 +48,8 @@ namespace { template int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx, int lda) { + oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx, + int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { for (std::exception_ptr const& e : exceptions) { @@ -91,13 +92,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::trmv(main_queue, upper_lower, transa, - unit_nonunit, n, A.data(), lda, - x.data(), incx, dependencies); + unit_nonunit, n, A.data(), lda, + x.data(), incx, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::trmv(main_queue, upper_lower, transa, - unit_nonunit, n, A.data(), lda, x.data(), - incx, dependencies); + unit_nonunit, n, A.data(), lda, x.data(), + incx, dependencies); break; default: break; } @@ -110,9 +111,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe x.data(), incx, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trmv, upper_lower, - transa, unit_nonunit, n, A.data(), lda, x.data(), incx, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trmv, + upper_lower, transa, unit_nonunit, n, A.data(), lda, + x.data(), incx, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level2/trsv.cpp b/tests/unit_tests/blas/level2/trsv.cpp index f9279f597..05c64e97b 100644 --- a/tests/unit_tests/blas/level2/trsv.cpp +++ b/tests/unit_tests/blas/level2/trsv.cpp @@ -48,7 +48,8 @@ namespace { template int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx, int lda) { + oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx, + int lda) { // Prepare data. vector x, x_ref, A; rand_vector(x, n, incx); @@ -88,12 +89,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - oneapi::math::blas::column_major::trsv(main_queue, upper_lower, transa, unit_nonunit, - n, A_buffer, lda, x_buffer, incx); + oneapi::math::blas::column_major::trsv(main_queue, upper_lower, transa, + unit_nonunit, n, A_buffer, lda, x_buffer, + incx); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::trsv(main_queue, upper_lower, transa, unit_nonunit, n, - A_buffer, lda, x_buffer, incx); + oneapi::math::blas::row_major::trsv(main_queue, upper_lower, transa, unit_nonunit, + n, A_buffer, lda, x_buffer, incx); break; default: break; } @@ -105,8 +107,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe x_buffer, incx); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsv, upper_lower, - transa, unit_nonunit, n, A_buffer, lda, x_buffer, incx); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsv, + upper_lower, transa, unit_nonunit, n, A_buffer, lda, + x_buffer, incx); break; default: break; } diff --git a/tests/unit_tests/blas/level2/trsv_usm.cpp b/tests/unit_tests/blas/level2/trsv_usm.cpp index 53b8c1df7..98dbb5063 100644 --- a/tests/unit_tests/blas/level2/trsv_usm.cpp +++ b/tests/unit_tests/blas/level2/trsv_usm.cpp @@ -48,7 +48,8 @@ namespace { template int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, - oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx, int lda) { + oneapi::math::transpose transa, oneapi::math::diag unit_nonunit, int n, int incx, + int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { for (std::exception_ptr const& e : exceptions) { @@ -91,13 +92,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::trsv(main_queue, upper_lower, transa, - unit_nonunit, n, A.data(), lda, - x.data(), incx, dependencies); + unit_nonunit, n, A.data(), lda, + x.data(), incx, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::trsv(main_queue, upper_lower, transa, - unit_nonunit, n, A.data(), lda, x.data(), - incx, dependencies); + unit_nonunit, n, A.data(), lda, x.data(), + incx, dependencies); break; default: break; } @@ -110,9 +111,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe x.data(), incx, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsv, upper_lower, - transa, unit_nonunit, n, A.data(), lda, x.data(), incx, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::trsv, + upper_lower, transa, unit_nonunit, n, A.data(), lda, + x.data(), incx, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level3/gemm.cpp b/tests/unit_tests/blas/level3/gemm.cpp index c8f44a433..0350cdb1a 100644 --- a/tests/unit_tests/blas/level3/gemm.cpp +++ b/tests/unit_tests/blas/level3/gemm.cpp @@ -99,13 +99,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose trans switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::gemm(main_queue, transa, transb, m, n, k, alpha, - A_buffer, lda, B_buffer, ldb, beta, C_buffer, - ldc); + A_buffer, lda, B_buffer, ldb, beta, C_buffer, + ldc); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::gemm(main_queue, transa, transb, m, n, k, alpha, - A_buffer, lda, B_buffer, ldb, beta, C_buffer, - ldc); + A_buffer, lda, B_buffer, ldb, beta, C_buffer, + ldc); break; default: break; } diff --git a/tests/unit_tests/blas/level3/gemm_usm.cpp b/tests/unit_tests/blas/level3/gemm_usm.cpp index 82665e896..a18e79d79 100644 --- a/tests/unit_tests/blas/level3/gemm_usm.cpp +++ b/tests/unit_tests/blas/level3/gemm_usm.cpp @@ -99,13 +99,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::transpose trans switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::gemm(main_queue, transa, transb, m, n, k, - alpha, A.data(), lda, B.data(), ldb, - beta, C.data(), ldc, dependencies); + alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::gemm(main_queue, transa, transb, m, n, k, - alpha, A.data(), lda, B.data(), ldb, beta, - C.data(), ldc, dependencies); + alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level3/hemm.cpp b/tests/unit_tests/blas/level3/hemm.cpp index fc7b25858..6886195cf 100644 --- a/tests/unit_tests/blas/level3/hemm.cpp +++ b/tests/unit_tests/blas/level3/hemm.cpp @@ -98,13 +98,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::hemm(main_queue, left_right, upper_lower, m, n, - alpha, A_buffer, lda, B_buffer, ldb, beta, - C_buffer, ldc); + alpha, A_buffer, lda, B_buffer, ldb, beta, + C_buffer, ldc); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::hemm(main_queue, left_right, upper_lower, m, n, alpha, - A_buffer, lda, B_buffer, ldb, beta, C_buffer, - ldc); + oneapi::math::blas::row_major::hemm(main_queue, left_right, upper_lower, m, n, + alpha, A_buffer, lda, B_buffer, ldb, beta, + C_buffer, ldc); break; default: break; } @@ -157,30 +157,30 @@ TEST_P(HemmTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); } TEST_P(HemmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); std::complex alpha(2.0, -0.5); std::complex beta(3.0, -1.5); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); } INSTANTIATE_TEST_SUITE_P(HemmTestSuite, HemmTests, diff --git a/tests/unit_tests/blas/level3/hemm_usm.cpp b/tests/unit_tests/blas/level3/hemm_usm.cpp index 6d3314720..f898dc002 100644 --- a/tests/unit_tests/blas/level3/hemm_usm.cpp +++ b/tests/unit_tests/blas/level3/hemm_usm.cpp @@ -96,14 +96,14 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - done = oneapi::math::blas::column_major::hemm(main_queue, left_right, upper_lower, m, - n, alpha, A.data(), lda, B.data(), ldb, - beta, C.data(), ldc, dependencies); + done = oneapi::math::blas::column_major::hemm( + main_queue, left_right, upper_lower, m, n, alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; case oneapi::math::layout::row_major: - done = oneapi::math::blas::row_major::hemm(main_queue, left_right, upper_lower, m, n, - alpha, A.data(), lda, B.data(), ldb, beta, - C.data(), ldc, dependencies); + done = oneapi::math::blas::row_major::hemm(main_queue, left_right, upper_lower, m, + n, alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; default: break; } @@ -157,30 +157,30 @@ TEST_P(HemmUsmTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); } TEST_P(HemmUsmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); std::complex alpha(2.0, -0.5); std::complex beta(3.0, -1.5); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); } INSTANTIATE_TEST_SUITE_P(HemmUsmTestSuite, HemmUsmTests, diff --git a/tests/unit_tests/blas/level3/her2k.cpp b/tests/unit_tests/blas/level3/her2k.cpp index 73dbfa9ee..9df00b280 100644 --- a/tests/unit_tests/blas/level3/her2k.cpp +++ b/tests/unit_tests/blas/level3/her2k.cpp @@ -99,13 +99,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::her2k(main_queue, upper_lower, trans, n, k, alpha, - A_buffer, lda, B_buffer, ldb, beta, C_buffer, - ldc); + A_buffer, lda, B_buffer, ldb, beta, + C_buffer, ldc); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::her2k(main_queue, upper_lower, trans, n, k, alpha, - A_buffer, lda, B_buffer, ldb, beta, C_buffer, - ldc); + A_buffer, lda, B_buffer, ldb, beta, C_buffer, + ldc); break; default: break; } @@ -146,8 +146,8 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class Her2kTests : public ::testing::TestWithParam> { -}; +class Her2kTests + : public ::testing::TestWithParam> {}; TEST_P(Her2kTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level3/her2k_usm.cpp b/tests/unit_tests/blas/level3/her2k_usm.cpp index 642fd6219..c975104cf 100644 --- a/tests/unit_tests/blas/level3/her2k_usm.cpp +++ b/tests/unit_tests/blas/level3/her2k_usm.cpp @@ -99,13 +99,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::her2k(main_queue, upper_lower, trans, n, k, - alpha, A.data(), lda, B.data(), ldb, - beta, C.data(), ldc, dependencies); + alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::her2k(main_queue, upper_lower, trans, n, k, - alpha, A.data(), lda, B.data(), ldb, - beta, C.data(), ldc, dependencies); + alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level3/herk.cpp b/tests/unit_tests/blas/level3/herk.cpp index eea240cd8..9c0e858b2 100644 --- a/tests/unit_tests/blas/level3/herk.cpp +++ b/tests/unit_tests/blas/level3/herk.cpp @@ -93,11 +93,11 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::herk(main_queue, upper_lower, trans, n, k, alpha, - A_buffer, lda, beta, C_buffer, ldc); + A_buffer, lda, beta, C_buffer, ldc); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::herk(main_queue, upper_lower, trans, n, k, alpha, - A_buffer, lda, beta, C_buffer, ldc); + A_buffer, lda, beta, C_buffer, ldc); break; default: break; } @@ -109,8 +109,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe C_buffer, ldc); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::herk, upper_lower, - trans, n, k, alpha, A_buffer, lda, beta, C_buffer, ldc); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::herk, + upper_lower, trans, n, k, alpha, A_buffer, lda, beta, + C_buffer, ldc); break; default: break; } diff --git a/tests/unit_tests/blas/level3/herk_usm.cpp b/tests/unit_tests/blas/level3/herk_usm.cpp index c2c51b853..82e37cb22 100644 --- a/tests/unit_tests/blas/level3/herk_usm.cpp +++ b/tests/unit_tests/blas/level3/herk_usm.cpp @@ -94,13 +94,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::herk(main_queue, upper_lower, trans, n, k, - alpha, A.data(), lda, beta, C.data(), - ldc, dependencies); + alpha, A.data(), lda, beta, C.data(), + ldc, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::herk(main_queue, upper_lower, trans, n, k, - alpha, A.data(), lda, beta, C.data(), ldc, - dependencies); + alpha, A.data(), lda, beta, C.data(), + ldc, dependencies); break; default: break; } @@ -113,9 +113,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe C.data(), ldc, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::herk, upper_lower, - trans, n, k, alpha, A.data(), lda, beta, C.data(), ldc, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::herk, + upper_lower, trans, n, k, alpha, A.data(), lda, beta, + C.data(), ldc, dependencies); break; default: break; } diff --git a/tests/unit_tests/blas/level3/symm.cpp b/tests/unit_tests/blas/level3/symm.cpp index 13267aee0..0668adbff 100644 --- a/tests/unit_tests/blas/level3/symm.cpp +++ b/tests/unit_tests/blas/level3/symm.cpp @@ -98,13 +98,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::symm(main_queue, left_right, upper_lower, m, n, - alpha, A_buffer, lda, B_buffer, ldb, beta, - C_buffer, ldc); + alpha, A_buffer, lda, B_buffer, ldb, beta, + C_buffer, ldc); break; case oneapi::math::layout::row_major: - oneapi::math::blas::row_major::symm(main_queue, left_right, upper_lower, m, n, alpha, - A_buffer, lda, B_buffer, ldb, beta, C_buffer, - ldc); + oneapi::math::blas::row_major::symm(main_queue, left_right, upper_lower, m, n, + alpha, A_buffer, lda, B_buffer, ldb, beta, + C_buffer, ldc); break; default: break; } @@ -173,14 +173,14 @@ TEST_P(SymmTests, RealDoublePrecision) { oneapi::math::side::left, oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, 72, 27, 101, - 102, 103, alpha, beta)); + oneapi::math::side::right, oneapi::math::uplo::lower, 72, 27, + 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, 72, 27, 101, - 102, 103, alpha, beta)); + oneapi::math::side::right, oneapi::math::uplo::upper, 72, 27, + 101, 102, 103, alpha, beta)); } TEST_P(SymmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); @@ -188,33 +188,33 @@ TEST_P(SymmTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); } TEST_P(SymmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); std::complex alpha(2.0, -0.5); std::complex beta(3.0, -1.5); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); } INSTANTIATE_TEST_SUITE_P(SymmTestSuite, SymmTests, diff --git a/tests/unit_tests/blas/level3/symm_usm.cpp b/tests/unit_tests/blas/level3/symm_usm.cpp index 302dd1beb..dc8c00749 100644 --- a/tests/unit_tests/blas/level3/symm_usm.cpp +++ b/tests/unit_tests/blas/level3/symm_usm.cpp @@ -96,14 +96,14 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right #ifdef CALL_RT_API switch (layout) { case oneapi::math::layout::col_major: - done = oneapi::math::blas::column_major::symm(main_queue, left_right, upper_lower, m, - n, alpha, A.data(), lda, B.data(), ldb, - beta, C.data(), ldc, dependencies); + done = oneapi::math::blas::column_major::symm( + main_queue, left_right, upper_lower, m, n, alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; case oneapi::math::layout::row_major: - done = oneapi::math::blas::row_major::symm(main_queue, left_right, upper_lower, m, n, - alpha, A.data(), lda, B.data(), ldb, beta, - C.data(), ldc, dependencies); + done = oneapi::math::blas::row_major::symm(main_queue, left_right, upper_lower, m, + n, alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; default: break; } @@ -173,14 +173,14 @@ TEST_P(SymmUsmTests, RealDoublePrecision) { oneapi::math::side::left, oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, 72, 27, 101, - 102, 103, alpha, beta)); + oneapi::math::side::right, oneapi::math::uplo::lower, 72, 27, + 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, 72, 27, 101, - 102, 103, alpha, beta)); + oneapi::math::side::right, oneapi::math::uplo::upper, 72, 27, + 101, 102, 103, alpha, beta)); } TEST_P(SymmUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); @@ -188,33 +188,33 @@ TEST_P(SymmUsmTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); } TEST_P(SymmUsmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); std::complex alpha(2.0, -0.5); std::complex beta(3.0, -1.5); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, 72, 27, 101, 102, 103, alpha, beta)); } INSTANTIATE_TEST_SUITE_P(SymmUsmTestSuite, SymmUsmTests, diff --git a/tests/unit_tests/blas/level3/syr2k.cpp b/tests/unit_tests/blas/level3/syr2k.cpp index e735f0ace..0ecb2ad6b 100644 --- a/tests/unit_tests/blas/level3/syr2k.cpp +++ b/tests/unit_tests/blas/level3/syr2k.cpp @@ -49,7 +49,8 @@ namespace { template int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha, fp beta) { + oneapi::math::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha, + fp beta) { // Prepare data. vector> A, B, C, C_ref; rand_matrix(A, layout, trans, n, k, lda); @@ -94,13 +95,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::syr2k(main_queue, upper_lower, trans, n, k, alpha, - A_buffer, lda, B_buffer, ldb, beta, C_buffer, - ldc); + A_buffer, lda, B_buffer, ldb, beta, + C_buffer, ldc); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::syr2k(main_queue, upper_lower, trans, n, k, alpha, - A_buffer, lda, B_buffer, ldb, beta, C_buffer, - ldc); + A_buffer, lda, B_buffer, ldb, beta, C_buffer, + ldc); break; default: break; } @@ -141,8 +142,8 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe return (int)good; } -class Syr2kTests : public ::testing::TestWithParam> { -}; +class Syr2kTests + : public ::testing::TestWithParam> {}; TEST_P(Syr2kTests, RealSinglePrecision) { float alpha(3.0); @@ -172,11 +173,11 @@ TEST_P(Syr2kTests, RealDoublePrecision) { oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, 27, - 101, 102, 103, alpha, beta)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, + 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, 27, - 101, 102, 103, alpha, beta)); + oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, + 27, 101, 102, 103, alpha, beta)); } TEST_P(Syr2kTests, ComplexSinglePrecision) { std::complex alpha(3.0, -0.5); diff --git a/tests/unit_tests/blas/level3/syr2k_usm.cpp b/tests/unit_tests/blas/level3/syr2k_usm.cpp index 9a5899c6c..fd51bda52 100644 --- a/tests/unit_tests/blas/level3/syr2k_usm.cpp +++ b/tests/unit_tests/blas/level3/syr2k_usm.cpp @@ -48,7 +48,8 @@ namespace { template int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lower, - oneapi::math::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha, fp beta) { + oneapi::math::transpose trans, int n, int k, int lda, int ldb, int ldc, fp alpha, + fp beta) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { for (std::exception_ptr const& e : exceptions) { @@ -94,13 +95,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::syr2k(main_queue, upper_lower, trans, n, k, - alpha, A.data(), lda, B.data(), ldb, - beta, C.data(), ldc, dependencies); + alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::syr2k(main_queue, upper_lower, trans, n, k, - alpha, A.data(), lda, B.data(), ldb, - beta, C.data(), ldc, dependencies); + alpha, A.data(), lda, B.data(), ldb, + beta, C.data(), ldc, dependencies); break; default: break; } @@ -173,11 +174,11 @@ TEST_P(Syr2kUsmTests, RealDoublePrecision) { oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73, 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, 27, - 101, 102, 103, alpha, beta)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, + 27, 101, 102, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, 27, - 101, 102, 103, alpha, beta)); + oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, + 27, 101, 102, 103, alpha, beta)); } TEST_P(Syr2kUsmTests, ComplexSinglePrecision) { std::complex alpha(3.0, -0.5); diff --git a/tests/unit_tests/blas/level3/syrk.cpp b/tests/unit_tests/blas/level3/syrk.cpp index c0c281531..928aaed84 100644 --- a/tests/unit_tests/blas/level3/syrk.cpp +++ b/tests/unit_tests/blas/level3/syrk.cpp @@ -92,11 +92,11 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::syrk(main_queue, upper_lower, trans, n, k, alpha, - A_buffer, lda, beta, C_buffer, ldc); + A_buffer, lda, beta, C_buffer, ldc); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::syrk(main_queue, upper_lower, trans, n, k, alpha, - A_buffer, lda, beta, C_buffer, ldc); + A_buffer, lda, beta, C_buffer, ldc); break; default: break; } @@ -108,8 +108,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe C_buffer, ldc); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk, upper_lower, - trans, n, k, alpha, A_buffer, lda, beta, C_buffer, ldc); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk, + upper_lower, trans, n, k, alpha, A_buffer, lda, beta, + C_buffer, ldc); break; default: break; } @@ -167,11 +168,11 @@ TEST_P(SyrkTests, RealDoublePrecision) { oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, 27, - 101, 103, alpha, beta)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, + 27, 101, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, 27, - 101, 103, alpha, beta)); + oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, + 27, 101, 103, alpha, beta)); } TEST_P(SyrkTests, ComplexSinglePrecision) { std::complex alpha(3.0, -0.5); diff --git a/tests/unit_tests/blas/level3/syrk_usm.cpp b/tests/unit_tests/blas/level3/syrk_usm.cpp index 6e912a9ac..2771dd2b7 100644 --- a/tests/unit_tests/blas/level3/syrk_usm.cpp +++ b/tests/unit_tests/blas/level3/syrk_usm.cpp @@ -92,13 +92,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe switch (layout) { case oneapi::math::layout::col_major: done = oneapi::math::blas::column_major::syrk(main_queue, upper_lower, trans, n, k, - alpha, A.data(), lda, beta, C.data(), - ldc, dependencies); + alpha, A.data(), lda, beta, C.data(), + ldc, dependencies); break; case oneapi::math::layout::row_major: done = oneapi::math::blas::row_major::syrk(main_queue, upper_lower, trans, n, k, - alpha, A.data(), lda, beta, C.data(), ldc, - dependencies); + alpha, A.data(), lda, beta, C.data(), + ldc, dependencies); break; default: break; } @@ -111,9 +111,9 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::uplo upper_lowe C.data(), ldc, dependencies); break; case oneapi::math::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk, upper_lower, - trans, n, k, alpha, A.data(), lda, beta, C.data(), ldc, - dependencies); + TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::math::blas::row_major::syrk, + upper_lower, trans, n, k, alpha, A.data(), lda, beta, + C.data(), ldc, dependencies); break; default: break; } @@ -171,11 +171,11 @@ TEST_P(SyrkUsmTests, RealDoublePrecision) { oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, 73, 27, 101, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, 27, - 101, 103, alpha, beta)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, 73, + 27, 101, 103, alpha, beta)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, 27, - 101, 103, alpha, beta)); + oneapi::math::uplo::upper, oneapi::math::transpose::trans, 73, + 27, 101, 103, alpha, beta)); } TEST_P(SyrkUsmTests, ComplexSinglePrecision) { std::complex alpha(3.0, -0.5); diff --git a/tests/unit_tests/blas/level3/trmm.cpp b/tests/unit_tests/blas/level3/trmm.cpp index 1dbaa4820..86d127cce 100644 --- a/tests/unit_tests/blas/level3/trmm.cpp +++ b/tests/unit_tests/blas/level3/trmm.cpp @@ -98,13 +98,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::trmm(main_queue, left_right, upper_lower, transa, - unit_nonunit, m, n, alpha, A_buffer, lda, - B_buffer, ldb); + unit_nonunit, m, n, alpha, A_buffer, lda, + B_buffer, ldb); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::trmm(main_queue, left_right, upper_lower, transa, - unit_nonunit, m, n, alpha, A_buffer, lda, - B_buffer, ldb); + unit_nonunit, m, n, alpha, A_buffer, lda, + B_buffer, ldb); break; default: break; } @@ -152,44 +152,44 @@ TEST_P(TrmmTests, RealSinglePrecision) { float alpha(2.0); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); } TEST_P(TrmmTests, RealDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); @@ -205,20 +205,20 @@ TEST_P(TrmmTests, RealDoublePrecision) { 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, @@ -242,119 +242,119 @@ TEST_P(TrmmTests, ComplexSinglePrecision) { oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); } TEST_P(TrmmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); std::complex alpha(2.0, -0.5); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); } INSTANTIATE_TEST_SUITE_P(TrmmTestSuite, TrmmTests, diff --git a/tests/unit_tests/blas/level3/trmm_usm.cpp b/tests/unit_tests/blas/level3/trmm_usm.cpp index 0e885b37e..20469d752 100644 --- a/tests/unit_tests/blas/level3/trmm_usm.cpp +++ b/tests/unit_tests/blas/level3/trmm_usm.cpp @@ -154,44 +154,44 @@ TEST_P(TrmmUsmTests, RealSinglePrecision) { float alpha(2.0); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); } TEST_P(TrmmUsmTests, RealDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); @@ -207,20 +207,20 @@ TEST_P(TrmmUsmTests, RealDoublePrecision) { 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, @@ -244,119 +244,119 @@ TEST_P(TrmmUsmTests, ComplexSinglePrecision) { oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); } TEST_P(TrmmUsmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); std::complex alpha(2.0, -0.5); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); } INSTANTIATE_TEST_SUITE_P(TrmmUsmTestSuite, TrmmUsmTests, diff --git a/tests/unit_tests/blas/level3/trsm.cpp b/tests/unit_tests/blas/level3/trsm.cpp index 0eac2ff36..c10c4c2a5 100644 --- a/tests/unit_tests/blas/level3/trsm.cpp +++ b/tests/unit_tests/blas/level3/trsm.cpp @@ -98,13 +98,13 @@ int test(device* dev, oneapi::math::layout layout, oneapi::math::side left_right switch (layout) { case oneapi::math::layout::col_major: oneapi::math::blas::column_major::trsm(main_queue, left_right, upper_lower, transa, - unit_nonunit, m, n, alpha, A_buffer, lda, - B_buffer, ldb); + unit_nonunit, m, n, alpha, A_buffer, lda, + B_buffer, ldb); break; case oneapi::math::layout::row_major: oneapi::math::blas::row_major::trsm(main_queue, left_right, upper_lower, transa, - unit_nonunit, m, n, alpha, A_buffer, lda, - B_buffer, ldb); + unit_nonunit, m, n, alpha, A_buffer, lda, + B_buffer, ldb); break; default: break; } @@ -152,20 +152,20 @@ TEST_P(TrsmTests, RealSinglePrecision) { float alpha(2.0); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, @@ -184,36 +184,36 @@ TEST_P(TrsmTests, RealSinglePrecision) { 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); } TEST_P(TrsmTests, RealDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); @@ -253,20 +253,20 @@ TEST_P(TrsmTests, RealDoublePrecision) { 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, @@ -290,199 +290,199 @@ TEST_P(TrsmTests, ComplexSinglePrecision) { oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, + 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, + 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); } TEST_P(TrsmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); std::complex alpha(2.0, -0.5); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); } INSTANTIATE_TEST_SUITE_P(TrsmTestSuite, TrsmTests, diff --git a/tests/unit_tests/blas/level3/trsm_usm.cpp b/tests/unit_tests/blas/level3/trsm_usm.cpp index 86b958d82..0f2247a14 100644 --- a/tests/unit_tests/blas/level3/trsm_usm.cpp +++ b/tests/unit_tests/blas/level3/trsm_usm.cpp @@ -155,20 +155,20 @@ TEST_P(TrsmUsmTests, RealSinglePrecision) { float alpha(2.0); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, @@ -187,36 +187,36 @@ TEST_P(TrsmUsmTests, RealSinglePrecision) { 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); } TEST_P(TrsmUsmTests, RealDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); @@ -256,20 +256,20 @@ TEST_P(TrsmUsmTests, RealDoublePrecision) { 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, @@ -293,199 +293,199 @@ TEST_P(TrsmUsmTests, ComplexSinglePrecision) { oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, + 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, + 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>( - std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, - 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, - 27, 101, 102, alpha)); + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); } TEST_P(TrsmUsmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); std::complex alpha(2.0, -0.5); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::nontrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::trans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::lower, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::left, oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); - EXPECT_TRUEORSKIP(test>(std::get<0>(GetParam()), std::get<1>(GetParam()), - oneapi::math::side::right, oneapi::math::uplo::upper, - oneapi::math::transpose::conjtrans, - oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, 27, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); EXPECT_TRUEORSKIP(test>( std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, - oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, 72, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::unit, 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::nontrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::trans, oneapi::math::diag::nonunit, 72, + 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::lower, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::left, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); + EXPECT_TRUEORSKIP(test>( + std::get<0>(GetParam()), std::get<1>(GetParam()), oneapi::math::side::right, + oneapi::math::uplo::upper, oneapi::math::transpose::conjtrans, oneapi::math::diag::nonunit, + 72, 27, 101, 102, alpha)); } INSTANTIATE_TEST_SUITE_P(TrsmUsmTestSuite, TrsmUsmTests, diff --git a/tests/unit_tests/dft/include/compute_inplace.hpp b/tests/unit_tests/dft/include/compute_inplace.hpp index a423476fe..bd11ed9d4 100644 --- a/tests/unit_tests/dft/include/compute_inplace.hpp +++ b/tests/unit_tests/dft/include/compute_inplace.hpp @@ -94,14 +94,17 @@ int DFT_Test::test_in_place_buffer() { auto acc_host = inout_buf.get_host_access(); auto ptr_host = reinterpret_cast(acc_host.get_pointer()); for (std::int64_t i = 0; i < batches; i++) { - EXPECT_TRUE(check_equal_strided( - ptr_host + backward_distance * i, out_host_ref.data() + ref_distance * i, sizes, - modified_strides_bwd, abs_error_margin, rel_error_margin, std::cout)); + EXPECT_TRUE(check_equal_strided < domain == + oneapi::math::dft::domain::REAL > + (ptr_host + backward_distance * i, + out_host_ref.data() + ref_distance * i, sizes, + modified_strides_bwd, abs_error_margin, rel_error_margin, + std::cout)); } } oneapi::math::dft::compute_backward, - FwdInputType>(descriptor, inout_buf); + FwdInputType>(descriptor, inout_buf); } std::vector fwd_data_ref = input; @@ -184,19 +187,21 @@ int DFT_Test::test_in_place_USM() { std::vector no_dependencies; oneapi::math::dft::compute_forward(descriptor, inout.data(), - no_dependencies) + no_dependencies) .wait_and_throw(); for (std::int64_t i = 0; i < batches; i++) { - EXPECT_TRUE(check_equal_strided( - reinterpret_cast(inout.data()) + backward_distance * i, - out_host_ref.data() + ref_distance * i, sizes, modified_strides_bwd, abs_error_margin, - rel_error_margin, std::cout)); + EXPECT_TRUE(check_equal_strided < domain == + oneapi::math::dft::domain::REAL > + (reinterpret_cast(inout.data()) + backward_distance * i, + out_host_ref.data() + ref_distance * i, sizes, modified_strides_bwd, + abs_error_margin, rel_error_margin, std::cout)); } sycl::event done = oneapi::math::dft::compute_backward, - FwdInputType>(descriptor, inout.data(), no_dependencies); + FwdInputType>(descriptor, inout.data(), + no_dependencies); done.wait_and_throw(); std::for_each(input.begin(), input.end(), diff --git a/tests/unit_tests/dft/include/compute_inplace_real_real.hpp b/tests/unit_tests/dft/include/compute_inplace_real_real.hpp index 0d9d726ee..263cc622f 100644 --- a/tests/unit_tests/dft/include/compute_inplace_real_real.hpp +++ b/tests/unit_tests/dft/include/compute_inplace_real_real.hpp @@ -66,8 +66,8 @@ int DFT_Test::test_in_place_real_real_USM() { abs_error_margin, rel_error_margin, std::cout)); oneapi::math::dft::compute_backward, - PrecisionType>(descriptor, inout_re.data(), - inout_im.data(), no_dependencies) + PrecisionType>(descriptor, inout_re.data(), + inout_im.data(), no_dependencies) .wait_and_throw(); for (std::size_t i = 0; i < output_data.size(); ++i) { @@ -118,7 +118,7 @@ int DFT_Test::test_in_place_real_real_buffer() { sycl::range<1>(size_total) }; oneapi::math::dft::compute_forward(descriptor, inout_re_buf, - inout_im_buf); + inout_im_buf); { auto acc_inout_re = inout_re_buf.get_host_access(); @@ -133,7 +133,7 @@ int DFT_Test::test_in_place_real_real_buffer() { } oneapi::math::dft::compute_backward, - PrecisionType>(descriptor, inout_re_buf, inout_im_buf); + PrecisionType>(descriptor, inout_re_buf, inout_im_buf); { auto acc_inout_re = inout_re_buf.get_host_access(); diff --git a/tests/unit_tests/dft/include/compute_out_of_place.hpp b/tests/unit_tests/dft/include/compute_out_of_place.hpp index 30b5cb5e1..284a68e80 100644 --- a/tests/unit_tests/dft/include/compute_out_of_place.hpp +++ b/tests/unit_tests/dft/include/compute_out_of_place.hpp @@ -77,20 +77,22 @@ int DFT_Test::test_out_of_place_buffer() { auto acc_bwd = bwd_buf.get_host_access(); auto bwd_ptr = acc_bwd.get_pointer(); for (std::int64_t i = 0; i < batches; i++) { - EXPECT_TRUE(check_equal_strided( - bwd_ptr + backward_distance * i, out_host_ref.data() + ref_distance * i, sizes, - strides_bwd_cpy, abs_error_margin, rel_error_margin, std::cout)); + EXPECT_TRUE(check_equal_strided < domain == + oneapi::math::dft::domain::REAL > + (bwd_ptr + backward_distance * i, + out_host_ref.data() + ref_distance * i, sizes, strides_bwd_cpy, + abs_error_margin, rel_error_margin, std::cout)); } } oneapi::math::dft::compute_backward, - FwdOutputType, FwdInputType>(descriptor, bwd_buf, - fwd_buf); + FwdOutputType, FwdInputType>(descriptor, bwd_buf, + fwd_buf); } // account for scaling that occurs during DFT std::for_each(input.begin(), input.end(), - [this](auto &x) { x *= static_cast(forward_elements); }); + [this](auto& x) { x *= static_cast(forward_elements); }); for (std::int64_t i = 0; i < batches; i++) { EXPECT_TRUE(check_equal_strided( @@ -164,19 +166,20 @@ int DFT_Test::test_out_of_place_USM() { auto bwd_ptr = &bwd[0]; for (std::int64_t i = 0; i < batches; i++) { - EXPECT_TRUE(check_equal_strided( - bwd_ptr + backward_distance * i, out_host_ref.data() + ref_distance * i, sizes, - strides_bwd_cpy, abs_error_margin, rel_error_margin, std::cout)); + EXPECT_TRUE(check_equal_strided < domain == + oneapi::math::dft::domain::REAL > + (bwd_ptr + backward_distance * i, out_host_ref.data() + ref_distance * i, + sizes, strides_bwd_cpy, abs_error_margin, rel_error_margin, std::cout)); } - oneapi::math::dft::compute_backward, FwdOutputType, - FwdInputType>(descriptor, bwd.data(), fwd.data(), - no_dependencies) + oneapi::math::dft::compute_backward, + FwdOutputType, FwdInputType>(descriptor, bwd.data(), + fwd.data(), no_dependencies) .wait_and_throw(); // account for scaling that occurs during DFT std::for_each(input.begin(), input.end(), - [this](auto &x) { x *= static_cast(forward_elements); }); + [this](auto& x) { x *= static_cast(forward_elements); }); for (std::int64_t i = 0; i < batches; i++) { EXPECT_TRUE(check_equal_strided( diff --git a/tests/unit_tests/dft/include/compute_out_of_place_real_real.hpp b/tests/unit_tests/dft/include/compute_out_of_place_real_real.hpp index b1af7e560..9f756f5cf 100644 --- a/tests/unit_tests/dft/include/compute_out_of_place_real_real.hpp +++ b/tests/unit_tests/dft/include/compute_out_of_place_real_real.hpp @@ -74,7 +74,7 @@ int DFT_Test::test_out_of_place_real_real_USM() { abs_error_margin, rel_error_margin, std::cout)); oneapi::math::dft::compute_backward, - PrecisionType, PrecisionType>( + PrecisionType, PrecisionType>( descriptor, out_re.data(), out_im.data(), out_back_re.data(), out_back_im.data(), no_dependencies) .wait_and_throw(); @@ -139,7 +139,7 @@ int DFT_Test::test_out_of_place_real_real_buffer() { } oneapi::math::dft::compute_backward, - PrecisionType, PrecisionType>( + PrecisionType, PrecisionType>( descriptor, out_dev_re, out_dev_im, out_back_dev_re, out_back_dev_im); { diff --git a/tests/unit_tests/dft/include/reference_dft.hpp b/tests/unit_tests/dft/include/reference_dft.hpp index 661829282..5c2abcc21 100644 --- a/tests/unit_tests/dft/include/reference_dft.hpp +++ b/tests/unit_tests/dft/include/reference_dft.hpp @@ -32,7 +32,7 @@ namespace detail { using ref_t = long double; /* Do the calculations using long double */ template -void reference_forward_dft_impl(const TypeIn *in, TypeOut *out, std::size_t N, std::size_t stride) { +void reference_forward_dft_impl(const TypeIn* in, TypeOut* out, std::size_t N, std::size_t stride) { static_assert(is_complex(), "Output type of DFT must be complex"); constexpr ref_t TWOPI = 2.0L * 3.141592653589793238462643383279502884197L; @@ -54,14 +54,14 @@ struct reference {}; template struct reference { - static void forward_dft(const std::vector &sizes, const TypeIn *in, TypeOut *out) { + static void forward_dft(const std::vector& sizes, const TypeIn* in, TypeOut* out) { reference_forward_dft_impl(in, out, sizes[0], 1); } }; template struct reference { - static void forward_dft(const std::vector &sizes, const TypeIn *in, TypeOut *out) { + static void forward_dft(const std::vector& sizes, const TypeIn* in, TypeOut* out) { const auto elements = std::accumulate(sizes.begin(), sizes.end(), 1U, std::multiplies<>{}); std::vector> tmp(elements); for (std::size_t i = 0; i < elements; i += sizes[1]) { @@ -75,7 +75,7 @@ struct reference { template struct reference { - static void forward_dft(const std::vector &sizes, const TypeIn *in, TypeOut *out) { + static void forward_dft(const std::vector& sizes, const TypeIn* in, TypeOut* out) { const auto elements = std::accumulate(sizes.begin(), sizes.end(), 1U, std::multiplies<>{}); std::vector> tmp1(elements); std::vector> tmp2(elements); @@ -112,7 +112,7 @@ struct reference { * @param stride the stride between elements in the data set, measured in elements. **/ template -void reference_forward_dft(const std::vector &sizes, const TypeIn *in, TypeOut *out) { +void reference_forward_dft(const std::vector& sizes, const TypeIn* in, TypeOut* out) { std::vector unsigned_sizes(sizes.size()); std::transform(sizes.begin(), sizes.end(), unsigned_sizes.begin(), [](std::int64_t size) { return cast_unsigned(size); }); diff --git a/tests/unit_tests/dft/include/test_common.hpp b/tests/unit_tests/dft/include/test_common.hpp index 70f60d363..5b1647e94 100644 --- a/tests/unit_tests/dft/include/test_common.hpp +++ b/tests/unit_tests/dft/include/test_common.hpp @@ -58,7 +58,7 @@ inline std::size_t cast_unsigned(std::int64_t i) { } template -bool check_equal(fp x, fp x_ref, double abs_error_mag, double rel_error_mag, std::ostream &out) { +bool check_equal(fp x, fp x_ref, double abs_error_mag, double rel_error_mag, std::ostream& out) { using fp_real = typename complex_info::real_type; static_assert(std::is_floating_point_v, "Expected floating-point real or complex type."); @@ -88,8 +88,8 @@ bool check_equal(fp x, fp x_ref, double abs_error_mag, double rel_error_mag, std } template -bool check_equal_vector(vec1 &&v, vec2 &&v_ref, std::size_t n, double abs_error_mag, - double rel_error_mag, std::ostream &out) { +bool check_equal_vector(vec1&& v, vec2&& v_ref, std::size_t n, double abs_error_mag, + double rel_error_mag, std::ostream& out) { constexpr int max_print = 20; int count = 0; bool good = true; @@ -131,7 +131,7 @@ inline t rand_scalar() { } template -void rand_vector(vec &v, std::size_t n) { +void rand_vector(vec& v, std::size_t n) { using fp = typename vec::value_type; v.resize(n); for (std::size_t i = 0; i < n; i++) { @@ -141,7 +141,7 @@ void rand_vector(vec &v, std::size_t n) { // Catch asynchronous exceptions. auto exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } @@ -153,7 +153,7 @@ auto exception_handler = [](sycl::exception_list exceptions) { }; template -void commit_descriptor(oneapi::math::dft::descriptor &descriptor, +void commit_descriptor(oneapi::math::dft::descriptor& descriptor, sycl::queue queue) { #ifdef CALL_RT_API descriptor.commit(queue); @@ -164,7 +164,7 @@ void commit_descriptor(oneapi::math::dft::descriptor &descrip // is it assumed that the unused elements of the array are ignored inline std::array get_conjugate_even_complex_strides( - const std::vector &sizes) { + const std::vector& sizes) { switch (sizes.size()) { case 1: return { 0, 1 }; case 2: return { 0, sizes[1] / 2 + 1, 1 }; @@ -178,7 +178,7 @@ inline std::array get_conjugate_even_complex_strides( } // is it assumed that the unused elements of the array are ignored -inline std::array get_default_strides(const std::vector &sizes) { +inline std::array get_default_strides(const std::vector& sizes) { if (sizes.size() > 3) { throw oneapi::math::unimplemented( "dft/test_common", __FUNCTION__, @@ -207,8 +207,8 @@ T get_default(const std::vector vec, std::size_t idx, T default_) { template std::pair get_default_distances( - const std::vector &sizes, const std::vector &strides_fwd, - const std::vector &strides_bwd) { + const std::vector& sizes, const std::vector& strides_fwd, + const std::vector& strides_bwd) { std::int64_t size0 = sizes[0]; std::int64_t size1 = get_default(sizes, 1, 1l); std::int64_t size2 = get_default(sizes, 2, 1l); @@ -241,8 +241,8 @@ std::pair get_default_distances( //up to 3 dimensions, empty strides = default template > std::vector strided_copy( - const T_vec &contiguous, const std::vector &sizes, - const std::vector &strides, std::int64_t batches, std::int64_t distance, + const T_vec& contiguous, const std::vector& sizes, + const std::vector& strides, std::int64_t batches, std::int64_t distance, Allocator alloc = {}) { if (strides.size() == 0) { return { contiguous.begin(), contiguous.end(), alloc }; @@ -273,9 +273,9 @@ std::vector strided_copy( //up to 3 dimensions, empty strides = default template -bool check_equal_strided(const vec1 &v, const vec2 &v_ref, std::vector sizes, +bool check_equal_strided(const vec1& v, const vec2& v_ref, std::vector sizes, std::vector strides, double abs_error_mag, double rel_error_mag, - std::ostream &out) { + std::ostream& out) { if (strides.size() == 0) { std::array strides_arr; if constexpr (ConjugateEvenStrides) { @@ -344,8 +344,7 @@ struct DFTParams { class DFTParamsPrint { public: - std::string operator()( - testing::TestParamInfo> dev) const { + std::string operator()(testing::TestParamInfo> dev) const { auto [device, params] = dev.param; std::string info_name; @@ -377,7 +376,7 @@ class DFTParamsPrint { info_name.append("_batches_").append(std::to_string(params.batches)); std::string dev_name = device->get_info(); - std::for_each(dev_name.begin(), dev_name.end(), [](auto &c) { + std::for_each(dev_name.begin(), dev_name.end(), [](auto& c) { if (!isalnum(c)) c = '_'; }); diff --git a/tests/unit_tests/dft/source/compute_tests.cpp b/tests/unit_tests/dft/source/compute_tests.cpp index 6375a5e50..349ba2bd7 100644 --- a/tests/unit_tests/dft/source/compute_tests.cpp +++ b/tests/unit_tests/dft/source/compute_tests.cpp @@ -35,41 +35,41 @@ #include "compute_out_of_place.hpp" #include "compute_out_of_place_real_real.hpp" -extern std::vector devices; +extern std::vector devices; namespace { class ComputeTests_in_place_COMPLEX - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_real_real_in_place_COMPLEX - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_out_of_place_COMPLEX - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_real_real_out_of_place_COMPLEX - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_in_place_REAL - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_real_real_in_place_REAL - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_out_of_place_REAL - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_real_real_out_of_place_REAL - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; #define INSTANTIATE_TEST(PRECISION, DOMAIN, PLACE, LAYOUT, STORAGE) \ TEST_P(ComputeTests##_##LAYOUT##PLACE##_##DOMAIN, \ DOMAIN##_##PRECISION##_##PLACE##_##LAYOUT##STORAGE) { \ try { \ - auto test = DFT_Test{ \ + auto test = DFT_Test{ \ std::get<0>(GetParam()), std::get<1>(GetParam()).sizes, \ std::get<1>(GetParam()).strides_fwd, std::get<1>(GetParam()).strides_bwd, \ std::get<1>(GetParam()).batches \ }; \ EXPECT_TRUEORSKIP(test.test_##PLACE##_##LAYOUT##STORAGE()); \ } \ - catch (oneapi::math::unimplemented & e) { \ + catch (oneapi::math::unimplemented & e) { \ std::cout << "Skipping test because: \"" << e.what() << "\"" << std::endl; \ GTEST_SKIP(); \ } \ diff --git a/tests/unit_tests/dft/source/descriptor_tests.cpp b/tests/unit_tests/dft/source/descriptor_tests.cpp index da1f5c487..a4290e553 100644 --- a/tests/unit_tests/dft/source/descriptor_tests.cpp +++ b/tests/unit_tests/dft/source/descriptor_tests.cpp @@ -141,11 +141,14 @@ static void set_and_get_io_strides() { descriptor.get_value(oneapi::math::dft::config_param::INPUT_STRIDES, input_strides_before_set.data()); EXPECT_EQ(std::vector(strides_size, 0), input_strides_before_set); - descriptor.set_value(oneapi::math::dft::config_param::INPUT_STRIDES, input_strides_value.data()); + descriptor.set_value(oneapi::math::dft::config_param::INPUT_STRIDES, + input_strides_value.data()); descriptor.get_value(oneapi::math::dft::config_param::INPUT_STRIDES, input_strides_after_set.data()); - descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES, fwd_strides_after_set.data()); - descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES, bwd_strides_after_set.data()); + descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES, + fwd_strides_after_set.data()); + descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES, + bwd_strides_after_set.data()); EXPECT_EQ(input_strides_value, input_strides_after_set); EXPECT_EQ(std::vector(strides_size, 0), fwd_strides_after_set); EXPECT_EQ(std::vector(strides_size, 0), bwd_strides_after_set); @@ -206,8 +209,10 @@ static void set_and_get_fwd_bwd_strides() { descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES, fwd_strides_before_set.data()); EXPECT_EQ(fwd_strides_default_value, fwd_strides_before_set); - descriptor.set_value(oneapi::math::dft::config_param::FWD_STRIDES, fwd_strides_new_value.data()); - descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES, fwd_strides_after_set.data()); + descriptor.set_value(oneapi::math::dft::config_param::FWD_STRIDES, + fwd_strides_new_value.data()); + descriptor.get_value(oneapi::math::dft::config_param::FWD_STRIDES, + fwd_strides_after_set.data()); descriptor.get_value(oneapi::math::dft::config_param::INPUT_STRIDES, input_strides_after_set.data()); descriptor.get_value(oneapi::math::dft::config_param::OUTPUT_STRIDES, @@ -221,8 +226,10 @@ static void set_and_get_fwd_bwd_strides() { descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES, bwd_strides_before_set.data()); EXPECT_EQ(bwd_strides_default_value, bwd_strides_before_set); - descriptor.set_value(oneapi::math::dft::config_param::BWD_STRIDES, bwd_strides_new_value.data()); - descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES, bwd_strides_after_set.data()); + descriptor.set_value(oneapi::math::dft::config_param::BWD_STRIDES, + bwd_strides_new_value.data()); + descriptor.get_value(oneapi::math::dft::config_param::BWD_STRIDES, + bwd_strides_after_set.data()); EXPECT_EQ(bwd_strides_new_value, bwd_strides_after_set); } #pragma clang diagnostic pop @@ -289,7 +296,8 @@ static void set_and_get_values() { &fwd_distance_before_set); EXPECT_EQ(1, fwd_distance_before_set); descriptor.set_value(oneapi::math::dft::config_param::FWD_DISTANCE, fwd_distance_set_value); - descriptor.get_value(oneapi::math::dft::config_param::FWD_DISTANCE, &fwd_distance_after_set); + descriptor.get_value(oneapi::math::dft::config_param::FWD_DISTANCE, + &fwd_distance_after_set); EXPECT_EQ(fwd_distance_set_value, fwd_distance_after_set); std::int64_t bwd_distance_set_value{ domain == oneapi::math::dft::domain::REAL @@ -302,7 +310,8 @@ static void set_and_get_values() { &bwd_distance_before_set); EXPECT_EQ(1, bwd_distance_before_set); descriptor.set_value(oneapi::math::dft::config_param::BWD_DISTANCE, bwd_distance_set_value); - descriptor.get_value(oneapi::math::dft::config_param::BWD_DISTANCE, &bwd_distance_after_set); + descriptor.get_value(oneapi::math::dft::config_param::BWD_DISTANCE, + &bwd_distance_after_set); EXPECT_EQ(bwd_distance_set_value, bwd_distance_after_set); } @@ -717,11 +726,13 @@ int test_commit(sycl::device* dev) { } TEST(DescriptorTests, DescriptorMoveRealSingle) { - EXPECT_TRUE((test_move())); + EXPECT_TRUE( + (test_move())); } TEST(DescriptorTests, DescriptorMoveRealDouble) { - EXPECT_TRUE((test_move())); + EXPECT_TRUE( + (test_move())); } TEST(DescriptorTests, DescriptorMoveComplexSingle) { @@ -735,13 +746,13 @@ TEST(DescriptorTests, DescriptorMoveComplexDouble) { } TEST(DescriptorTests, DescriptorTestsRealSingle) { - EXPECT_TRUE(( - test_getter_setter())); + EXPECT_TRUE((test_getter_setter())); } TEST(DescriptorTests, DescriptorTestsRealDouble) { - EXPECT_TRUE(( - test_getter_setter())); + EXPECT_TRUE((test_getter_setter())); } TEST(DescriptorTests, DescriptorTestsComplexSingle) { diff --git a/tests/unit_tests/include/test_helper.hpp b/tests/unit_tests/include/test_helper.hpp index 60702adf5..9a34b3f9f 100644 --- a/tests/unit_tests/include/test_helper.hpp +++ b/tests/unit_tests/include/test_helper.hpp @@ -271,11 +271,11 @@ } \ } while (0); -void print_error_code(sycl::exception const &e); +void print_error_code(sycl::exception const& e); class DeviceNamePrint { public: - std::string operator()(testing::TestParamInfo dev) const { + std::string operator()(testing::TestParamInfo dev) const { std::string dev_name = dev.param->get_info(); for (std::string::size_type i = 0; i < dev_name.size(); ++i) { if (!isalnum(dev_name[i])) @@ -290,9 +290,10 @@ class DeviceNamePrint { class LayoutDeviceNamePrint { public: std::string operator()( - testing::TestParamInfo> dev) const { - std::string layout_name = - std::get<1>(dev.param) == oneapi::math::layout::col_major ? "Column_Major" : "Row_Major"; + testing::TestParamInfo> dev) const { + std::string layout_name = std::get<1>(dev.param) == oneapi::math::layout::col_major + ? "Column_Major" + : "Row_Major"; std::string dev_name = std::get<0>(dev.param)->get_info(); for (std::string::size_type i = 0; i < dev_name.size(); ++i) { if (!isalnum(dev_name[i])) @@ -308,7 +309,7 @@ class LayoutDeviceNamePrint { namespace oneapi { namespace math { -static inline void *aligned_alloc(size_t align, size_t size) { +static inline void* aligned_alloc(size_t align, size_t size) { #ifdef _WIN64 return ::_aligned_malloc(size, align); #else @@ -316,7 +317,7 @@ static inline void *aligned_alloc(size_t align, size_t size) { #endif } -static inline void aligned_free(void *p) { +static inline void aligned_free(void* p) { #ifdef _WIN64 ::_aligned_free(p); #else @@ -325,7 +326,7 @@ static inline void aligned_free(void *p) { } /* Support for Unified Shared Memory allocations for different backends */ -static inline void *malloc_shared(size_t align, size_t size, sycl::device dev, sycl::context ctx) { +static inline void* malloc_shared(size_t align, size_t size, sycl::device dev, sycl::context ctx) { (void)align; #ifdef _WIN64 return sycl::malloc_shared(size, dev, ctx); @@ -339,7 +340,7 @@ static inline void *malloc_shared(size_t align, size_t size, sycl::device dev, s #endif } -static inline void *malloc_device(size_t align, size_t size, sycl::device dev, sycl::context ctx) { +static inline void* malloc_device(size_t align, size_t size, sycl::device dev, sycl::context ctx) { (void)align; #ifdef _WIN64 return sycl::malloc_device(size, dev, ctx); @@ -353,11 +354,11 @@ static inline void *malloc_device(size_t align, size_t size, sycl::device dev, s #endif } -static inline void free_shared(void *p, sycl::context ctx) { +static inline void free_shared(void* p, sycl::context ctx) { sycl::free(p, ctx); } -static inline void free_usm(void *p, sycl::context ctx) { +static inline void free_usm(void* p, sycl::context ctx) { sycl::free(p, ctx); } diff --git a/tests/unit_tests/lapack/include/lapack_accuracy_checks.hpp b/tests/unit_tests/lapack/include/lapack_accuracy_checks.hpp index 8cf2ade94..c379e5d25 100644 --- a/tests/unit_tests/lapack/include/lapack_accuracy_checks.hpp +++ b/tests/unit_tests/lapack/include/lapack_accuracy_checks.hpp @@ -206,8 +206,9 @@ bool check_gerqf_accuracy(const std::vector& A, const std::vector& A_ini std::vector tau2(n); for (int64_t i = 0; i < std::min(m, n); i++) tau2[n - m + i] = tau[i]; - auto info = reference::or_un_mrq(oneapi::math::side::right, oneapi::math::transpose::nontrans, - m, n, n, Q.data(), ldq, tau2.data(), R.data(), ldr); + auto info = + reference::or_un_mrq(oneapi::math::side::right, oneapi::math::transpose::nontrans, m, n, + n, Q.data(), ldq, tau2.data(), R.data(), ldr); if (0 != info) { test_log::lout << "reference ormqr/unmqr failed with info = " << info << std::endl; return false; @@ -395,8 +396,9 @@ bool check_or_un_gbr_accuracy(oneapi::math::generate vect, int64_t m, int64_t n, /* | I - Q'Q | < m O(eps) */ std::vector QQ(cols_Q * cols_Q); int64_t ldqq = cols_Q; - reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, cols_Q, - cols_Q, rows_Q, 1.0, Q.data(), ldq, Q.data(), ldq, 0.0, QQ.data(), ldqq); + reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, + cols_Q, cols_Q, rows_Q, 1.0, Q.data(), ldq, Q.data(), ldq, 0.0, QQ.data(), + ldqq); if (!rel_id_err_check(cols_Q, QQ, ldqq)) { test_log::lout << "Q Orthogonality check failed" << std::endl; result = false; @@ -411,8 +413,9 @@ bool check_or_un_gbr_accuracy(oneapi::math::generate vect, int64_t m, int64_t n, /* | I - (P')(P')' | < m O(eps) */ std::vector PP(rows_P * rows_P); int64_t ldpp = rows_P; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, rows_P, - rows_P, cols_P, 1.0, P.data(), ldp, P.data(), ldp, 0.0, PP.data(), ldpp); + reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, + rows_P, rows_P, cols_P, 1.0, P.data(), ldp, P.data(), ldp, 0.0, PP.data(), + ldpp); if (!rel_id_err_check(rows_P, PP, ldpp)) { test_log::lout << "P^t Orthogonality check failed" << std::endl; result = false; @@ -490,15 +493,15 @@ bool check_potrf_accuracy(const std::vector& init, const std::vector& so } template -bool check_potrs_accuracy(oneapi::math::uplo uplo, int64_t n, int64_t nrhs, const std::vector& B, - int64_t ldb, std::vector A_initial, int64_t lda, - std::vector B_initial) { +bool check_potrs_accuracy(oneapi::math::uplo uplo, int64_t n, int64_t nrhs, + const std::vector& B, int64_t ldb, std::vector A_initial, + int64_t lda, std::vector B_initial) { using fp_real = typename complex_info::real_type; hermitian_to_full(uplo, n, A_initial, lda); // Compute A*X - B. Store result in B_initial - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n, nrhs, n, - -1.0, A_initial.data(), lda, B.data(), ldb, 1.0, B_initial.data(), ldb); + reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, n, nrhs, + n, -1.0, A_initial.data(), lda, B.data(), ldb, 1.0, B_initial.data(), ldb); // Compute norm residual |A*X - B| fp_real norm_residual = reference::lange('1', n, nrhs, B_initial.data(), ldb); diff --git a/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp b/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp index 37fe92dad..9e28e9feb 100644 --- a/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp +++ b/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp @@ -62,7 +62,7 @@ inline CBLAS_DIAG cblas_diag(oneapi::math::diag d) { return CblasUnit; return CblasNonUnit; } -inline CBLAS_SIDE cblas_side(const char *c) { +inline CBLAS_SIDE cblas_side(const char* c) { return *c == 'R' || *c == 'r' ? CblasRight : CblasLeft; } inline CBLAS_SIDE cblas_side(oneapi::math::side s) { @@ -149,143 +149,143 @@ inline char to_char(oneapi::math::generate v) { return 'Q'; } -inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, - float beta, float *c, int64_t ldc) { +inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc) { cblas_sgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc) { +inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc) { cblas_dgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc) { - cblas_cgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void *)&alpha, - (void *)a, lda, (void *)(b), ldb, (void *)&beta, (void *)c, ldc); +inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc) { + cblas_cgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void*)&alpha, + (void*)a, lda, (void*)(b), ldb, (void*)&beta, (void*)c, ldc); } -inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc) { - cblas_zgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void *)&alpha, - (void *)a, lda, (void *)(b), ldb, (void *)&beta, (void *)c, ldc); +inline void gemm(oneapi::math::transpose transa, oneapi::math::transpose transb, int64_t m, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc) { + cblas_zgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void*)&alpha, + (void*)a, lda, (void*)(b), ldb, (void*)&beta, (void*)c, ldc); } -inline int64_t syevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, float *a, int64_t lda, - float *w) { +inline int64_t syevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, float* a, int64_t lda, + float* w) { return LAPACKE_ssyevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, a, lda, w); } -inline int64_t syevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, double *a, int64_t lda, - double *w) { +inline int64_t syevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, double* a, int64_t lda, + double* w) { return LAPACKE_dsyevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, a, lda, w); } -inline int64_t sygvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n, float *a, - int64_t lda, float *b, int64_t ldb, float *w) { +inline int64_t sygvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n, float* a, + int64_t lda, float* b, int64_t ldb, float* w) { return LAPACKE_ssygvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, a, lda, b, ldb, w); } -inline int64_t sygvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n, double *a, - int64_t lda, double *b, int64_t ldb, double *w) { +inline int64_t sygvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n, double* a, + int64_t lda, double* b, int64_t ldb, double* w) { return LAPACKE_dsygvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, a, lda, b, ldb, w); } inline void syrk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, float beta, float *c, int64_t ldc) { + const float* a, int64_t lda, float beta, float* c, int64_t ldc) { cblas_ssyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc); } -inline void syrk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, double beta, double *c, int64_t ldc) { +inline void syrk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, double beta, double* c, int64_t ldc) { cblas_dsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc); } inline void syrk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc) { - cblas_csyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void *)&alpha, a, lda, - (void *)&beta, (void *)c, ldc); + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc) { + cblas_csyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void*)&alpha, a, lda, + (void*)&beta, (void*)c, ldc); } inline void syrk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc) { - cblas_zsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void *)&alpha, a, lda, - (void *)&beta, (void *)c, ldc); + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc) { + cblas_zsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void*)&alpha, a, lda, + (void*)&beta, (void*)c, ldc); } inline void herk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, float alpha, - const std::complex *a, int64_t lda, float beta, std::complex *c, + const std::complex* a, int64_t lda, float beta, std::complex* c, int64_t ldc) { - cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void *)c, + cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c, ldc); } -inline void herk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, double alpha, - const std::complex *a, int64_t lda, double beta, std::complex *c, - int64_t ldc) { - cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void *)c, +inline void herk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, + double alpha, const std::complex* a, int64_t lda, double beta, + std::complex* c, int64_t ldc) { + cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c, ldc); } inline void sy_he_rk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, float beta, float *c, int64_t ldc) { + float alpha, const float* a, int64_t lda, float beta, float* c, int64_t ldc) { cblas_ssyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc); } inline void sy_he_rk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, double beta, double *c, + double alpha, const double* a, int64_t lda, double beta, double* c, int64_t ldc) { cblas_dsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc); } inline void sy_he_rk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, - float alpha, const std::complex *a, int64_t lda, float beta, - std::complex *c, int64_t ldc) { - cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void *)c, + float alpha, const std::complex* a, int64_t lda, float beta, + std::complex* c, int64_t ldc) { + cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c, ldc); } inline void sy_he_rk(oneapi::math::uplo u, oneapi::math::transpose t, int64_t n, int64_t k, - double alpha, const std::complex *a, int64_t lda, double beta, - std::complex *c, int64_t ldc) { - cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void *)c, + double alpha, const std::complex* a, int64_t lda, double beta, + std::complex* c, int64_t ldc) { + cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c, ldc); } inline void trmm(oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose transa, - oneapi::math::diag diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, float *b, int64_t ldb) { + oneapi::math::diag diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, float* b, int64_t ldb) { cblas_strmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa), cblas_diag(diag), m, n, alpha, a, lda, b, ldb); } inline void trmm(oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose transa, - oneapi::math::diag diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, double *b, int64_t ldb) { + oneapi::math::diag diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, double* b, int64_t ldb) { cblas_dtrmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa), cblas_diag(diag), m, n, alpha, a, lda, b, ldb); } inline void trmm(oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose transa, oneapi::math::diag diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb) { cblas_ctrmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa), - cblas_diag(diag), m, n, (void *)&alpha, (void *)(a), lda, (void *)(b), ldb); + cblas_diag(diag), m, n, (void*)&alpha, (void*)(a), lda, (void*)(b), ldb); } inline void trmm(oneapi::math::side side, oneapi::math::uplo uplo, oneapi::math::transpose transa, oneapi::math::diag diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb) { cblas_ztrmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa), - cblas_diag(diag), m, n, (void *)&alpha, (void *)(a), lda, (void *)(b), ldb); + cblas_diag(diag), m, n, (void*)&alpha, (void*)(a), lda, (void*)(b), ldb); } -inline void swap(int64_t n, float *X, int64_t incX, float *Y, int64_t incY) { +inline void swap(int64_t n, float* X, int64_t incX, float* Y, int64_t incY) { cblas_sswap(n, X, incX, Y, incY); } -inline void swap(int64_t n, double *X, int64_t incX, double *Y, int64_t incY) { +inline void swap(int64_t n, double* X, int64_t incX, double* Y, int64_t incY) { cblas_dswap(n, X, incX, Y, incY); } -inline void swap(int64_t n, std::complex *X, int64_t incX, std::complex *Y, +inline void swap(int64_t n, std::complex* X, int64_t incX, std::complex* Y, int64_t incY) { - cblas_cswap(n, (void *)X, incX, (void *)Y, incY); + cblas_cswap(n, (void*)X, incX, (void*)Y, incY); } -inline void swap(int64_t n, std::complex *X, int64_t incX, std::complex *Y, +inline void swap(int64_t n, std::complex* X, int64_t incX, std::complex* Y, int64_t incY) { - cblas_zswap(n, (void *)X, incX, (void *)Y, incY); + cblas_zswap(n, (void*)X, incX, (void*)Y, incY); } template @@ -299,608 +299,611 @@ inline double lamch(char cmach) { return LAPACKE_dlamch(cmach); } -inline float lange(char norm, int64_t m, int64_t n, const std::complex *a, int64_t lda) { +inline float lange(char norm, int64_t m, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_clange(LAPACK_COL_MAJOR, norm, m, n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline double lange(char norm, int64_t m, int64_t n, const double *a, int64_t lda) { +inline double lange(char norm, int64_t m, int64_t n, const double* a, int64_t lda) { return LAPACKE_dlange(LAPACK_COL_MAJOR, norm, m, n, a, lda); } -inline float lange(char norm, int64_t m, int64_t n, const float *a, int64_t lda) { +inline float lange(char norm, int64_t m, int64_t n, const float* a, int64_t lda) { return LAPACKE_slange(LAPACK_COL_MAJOR, norm, m, n, a, lda); } -inline double lange(char norm, int64_t m, int64_t n, const std::complex *a, int64_t lda) { +inline double lange(char norm, int64_t m, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_zlange(LAPACK_COL_MAJOR, norm, m, n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline float lanhe(char norm, oneapi::math::uplo u, int64_t n, const std::complex *a, +inline float lanhe(char norm, oneapi::math::uplo u, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_clanhe(LAPACK_COL_MAJOR, norm, to_char(u), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline double lanhe(char norm, oneapi::math::uplo u, int64_t n, const std::complex *a, +inline double lanhe(char norm, oneapi::math::uplo u, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_zlanhe(LAPACK_COL_MAJOR, norm, to_char(u), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline float lansy(char norm, oneapi::math::uplo u, int64_t n, const std::complex *a, +inline float lansy(char norm, oneapi::math::uplo u, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_clansy(LAPACK_COL_MAJOR, norm, to_char(u), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline double lansy(char norm, oneapi::math::uplo u, int64_t n, const double *a, int64_t lda) { +inline double lansy(char norm, oneapi::math::uplo u, int64_t n, const double* a, int64_t lda) { return LAPACKE_dlansy(LAPACK_COL_MAJOR, norm, to_char(u), n, a, lda); } -inline float lansy(char norm, oneapi::math::uplo u, int64_t n, const float *a, int64_t lda) { +inline float lansy(char norm, oneapi::math::uplo u, int64_t n, const float* a, int64_t lda) { return LAPACKE_slansy(LAPACK_COL_MAJOR, norm, to_char(u), n, a, lda); } -inline double lansy(char norm, oneapi::math::uplo u, int64_t n, const std::complex *a, +inline double lansy(char norm, oneapi::math::uplo u, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_zlansy(LAPACK_COL_MAJOR, norm, to_char(u), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline int64_t lacpy(char u, int64_t m, int64_t n, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb) { +inline int64_t lacpy(char u, int64_t m, int64_t n, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb) { return LAPACKE_clacpy(LAPACK_COL_MAJOR, u, m, n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t lacpy(char u, int64_t m, int64_t n, const double *a, int64_t lda, double *b, +inline int64_t lacpy(char u, int64_t m, int64_t n, const double* a, int64_t lda, double* b, int64_t ldb) { return LAPACKE_dlacpy(LAPACK_COL_MAJOR, u, m, n, a, lda, b, ldb); } -inline int64_t lacpy(char u, int64_t m, int64_t n, const float *a, int64_t lda, float *b, +inline int64_t lacpy(char u, int64_t m, int64_t n, const float* a, int64_t lda, float* b, int64_t ldb) { return LAPACKE_slacpy(LAPACK_COL_MAJOR, u, m, n, a, lda, b, ldb); } -inline int64_t lacpy(char u, int64_t m, int64_t n, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb) { +inline int64_t lacpy(char u, int64_t m, int64_t n, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb) { return LAPACKE_zlacpy(LAPACK_COL_MAJOR, u, m, n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const std::complex *a, - int64_t lda, std::complex *b, int64_t ldb) { +inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const std::complex* a, + int64_t lda, std::complex* b, int64_t ldb) { return LAPACKE_clacpy(LAPACK_COL_MAJOR, to_char(u), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const double *a, int64_t lda, - double *b, int64_t ldb) { +inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const double* a, int64_t lda, + double* b, int64_t ldb) { return LAPACKE_dlacpy(LAPACK_COL_MAJOR, to_char(u), m, n, a, lda, b, ldb); } -inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const float *a, int64_t lda, - float *b, int64_t ldb) { +inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const float* a, int64_t lda, + float* b, int64_t ldb) { return LAPACKE_slacpy(LAPACK_COL_MAJOR, to_char(u), m, n, a, lda, b, ldb); } -inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const std::complex *a, - int64_t lda, std::complex *b, int64_t ldb) { +inline int64_t lacpy(oneapi::math::uplo u, int64_t m, int64_t n, const std::complex* a, + int64_t lda, std::complex* b, int64_t ldb) { return LAPACKE_zlacpy(LAPACK_COL_MAJOR, to_char(u), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } inline int64_t laset(oneapi::math::uplo u, int64_t m, int64_t n, std::complex alpha, - std::complex beta, std::complex *a, int64_t lda) { + std::complex beta, std::complex* a, int64_t lda) { return LAPACKE_claset(LAPACK_COL_MAJOR, to_char(u), m, n, - reinterpret_cast(alpha), - reinterpret_cast(beta), - reinterpret_cast(a), lda); + reinterpret_cast(alpha), + reinterpret_cast(beta), + reinterpret_cast(a), lda); } inline int64_t laset(oneapi::math::uplo u, int64_t m, int64_t n, double alpha, double beta, - double *a, int64_t lda) { + double* a, int64_t lda) { return LAPACKE_dlaset(LAPACK_COL_MAJOR, to_char(u), m, n, alpha, beta, a, lda); } -inline int64_t laset(oneapi::math::uplo u, int64_t m, int64_t n, float alpha, float beta, float *a, +inline int64_t laset(oneapi::math::uplo u, int64_t m, int64_t n, float alpha, float beta, float* a, int64_t lda) { return LAPACKE_slaset(LAPACK_COL_MAJOR, to_char(u), m, n, alpha, beta, a, lda); } inline int64_t laset(oneapi::math::uplo u, int64_t m, int64_t n, std::complex alpha, - std::complex beta, std::complex *a, int64_t lda) { + std::complex beta, std::complex* a, int64_t lda) { return LAPACKE_zlaset(LAPACK_COL_MAJOR, to_char(u), m, n, - reinterpret_cast(alpha), - reinterpret_cast(beta), - reinterpret_cast(a), lda); + reinterpret_cast(alpha), + reinterpret_cast(beta), + reinterpret_cast(a), lda); } inline int64_t laset(char u, int64_t m, int64_t n, std::complex alpha, - std::complex beta, std::complex *a, int64_t lda) { - return LAPACKE_claset(LAPACK_COL_MAJOR, u, m, n, - reinterpret_cast(alpha), - reinterpret_cast(beta), - reinterpret_cast(a), lda); + std::complex beta, std::complex* a, int64_t lda) { + return LAPACKE_claset(LAPACK_COL_MAJOR, u, m, n, reinterpret_cast(alpha), + reinterpret_cast(beta), + reinterpret_cast(a), lda); } -inline int64_t laset(char u, int64_t m, int64_t n, double alpha, double beta, double *a, +inline int64_t laset(char u, int64_t m, int64_t n, double alpha, double beta, double* a, int64_t lda) { return LAPACKE_dlaset(LAPACK_COL_MAJOR, u, m, n, alpha, beta, a, lda); } -inline int64_t laset(char u, int64_t m, int64_t n, float alpha, float beta, float *a, int64_t lda) { +inline int64_t laset(char u, int64_t m, int64_t n, float alpha, float beta, float* a, int64_t lda) { return LAPACKE_slaset(LAPACK_COL_MAJOR, u, m, n, alpha, beta, a, lda); } inline int64_t laset(char u, int64_t m, int64_t n, std::complex alpha, - std::complex beta, std::complex *a, int64_t lda) { + std::complex beta, std::complex* a, int64_t lda) { return LAPACKE_zlaset(LAPACK_COL_MAJOR, u, m, n, - reinterpret_cast(alpha), - reinterpret_cast(beta), - reinterpret_cast(a), lda); + reinterpret_cast(alpha), + reinterpret_cast(beta), + reinterpret_cast(a), lda); } -inline int64_t gebrd(int64_t m, int64_t n, std::complex *a, int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup) { - return LAPACKE_cgebrd(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - d, e, reinterpret_cast(tauq), - reinterpret_cast(taup)); +inline int64_t gebrd(int64_t m, int64_t n, std::complex* a, int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup) { + return LAPACKE_cgebrd(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + d, e, reinterpret_cast(tauq), + reinterpret_cast(taup)); } -inline int64_t gebrd(int64_t m, int64_t n, double *a, int64_t lda, double *d, double *e, - double *tauq, double *taup) { +inline int64_t gebrd(int64_t m, int64_t n, double* a, int64_t lda, double* d, double* e, + double* tauq, double* taup) { return LAPACKE_dgebrd(LAPACK_COL_MAJOR, m, n, a, lda, d, e, tauq, taup); } -inline int64_t gebrd(int64_t m, int64_t n, float *a, int64_t lda, float *d, float *e, float *tauq, - float *taup) { +inline int64_t gebrd(int64_t m, int64_t n, float* a, int64_t lda, float* d, float* e, float* tauq, + float* taup) { return LAPACKE_sgebrd(LAPACK_COL_MAJOR, m, n, a, lda, d, e, tauq, taup); } -inline int64_t gebrd(int64_t m, int64_t n, std::complex *a, int64_t lda, double *d, - double *e, std::complex *tauq, std::complex *taup) { - return LAPACKE_zgebrd(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - d, e, reinterpret_cast(tauq), - reinterpret_cast(taup)); +inline int64_t gebrd(int64_t m, int64_t n, std::complex* a, int64_t lda, double* d, + double* e, std::complex* tauq, std::complex* taup) { + return LAPACKE_zgebrd(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + d, e, reinterpret_cast(tauq), + reinterpret_cast(taup)); } -inline int64_t geqrf(int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau) { - return LAPACKE_cgeqrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(tau)); +inline int64_t geqrf(int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau) { + return LAPACKE_cgeqrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(tau)); } -inline int64_t geqrf(int64_t m, int64_t n, double *a, int64_t lda, double *tau) { +inline int64_t geqrf(int64_t m, int64_t n, double* a, int64_t lda, double* tau) { return LAPACKE_dgeqrf(LAPACK_COL_MAJOR, m, n, a, lda, tau); } -inline int64_t geqrf(int64_t m, int64_t n, float *a, int64_t lda, float *tau) { +inline int64_t geqrf(int64_t m, int64_t n, float* a, int64_t lda, float* tau) { return LAPACKE_sgeqrf(LAPACK_COL_MAJOR, m, n, a, lda, tau); } -inline int64_t geqrf(int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau) { - return LAPACKE_zgeqrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(tau)); +inline int64_t geqrf(int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau) { + return LAPACKE_zgeqrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(tau)); } -inline int64_t gerqf(int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau) { - return LAPACKE_cgerqf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(tau)); +inline int64_t gerqf(int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau) { + return LAPACKE_cgerqf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(tau)); } -inline int64_t gerqf(int64_t m, int64_t n, double *a, int64_t lda, double *tau) { +inline int64_t gerqf(int64_t m, int64_t n, double* a, int64_t lda, double* tau) { return LAPACKE_dgerqf(LAPACK_COL_MAJOR, m, n, a, lda, tau); } -inline int64_t gerqf(int64_t m, int64_t n, float *a, int64_t lda, float *tau) { +inline int64_t gerqf(int64_t m, int64_t n, float* a, int64_t lda, float* tau) { return LAPACKE_sgerqf(LAPACK_COL_MAJOR, m, n, a, lda, tau); } -inline int64_t gerqf(int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau) { - return LAPACKE_zgerqf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(tau)); +inline int64_t gerqf(int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau) { + return LAPACKE_zgerqf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(tau)); } inline int64_t gesvd(oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, int64_t m, int64_t n, - std::complex *a, int64_t lda, float *s, std::complex *u, - int64_t ldu, std::complex *vt, int64_t ldvt, float *superb) { + std::complex* a, int64_t lda, float* s, std::complex* u, + int64_t ldu, std::complex* vt, int64_t ldvt, float* superb) { return LAPACKE_cgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, - reinterpret_cast(a), lda, s, - reinterpret_cast(u), ldu, - reinterpret_cast(vt), ldvt, superb); + reinterpret_cast(a), lda, s, + reinterpret_cast(u), ldu, + reinterpret_cast(vt), ldvt, superb); } inline int64_t gesvd(oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, int64_t m, int64_t n, - double *a, int64_t lda, double *s, double *u, int64_t ldu, double *vt, - int64_t ldvt, double *superb) { + double* a, int64_t lda, double* s, double* u, int64_t ldu, double* vt, + int64_t ldvt, double* superb) { return LAPACKE_dgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, superb); } inline int64_t gesvd(oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, int64_t m, int64_t n, - float *a, int64_t lda, float *s, float *u, int64_t ldu, float *vt, - int64_t ldvt, float *superb) { + float* a, int64_t lda, float* s, float* u, int64_t ldu, float* vt, + int64_t ldvt, float* superb) { return LAPACKE_sgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, superb); } inline int64_t gesvd(oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, int64_t m, int64_t n, - std::complex *a, int64_t lda, double *s, std::complex *u, - int64_t ldu, std::complex *vt, int64_t ldvt, double *superb) { + std::complex* a, int64_t lda, double* s, std::complex* u, + int64_t ldu, std::complex* vt, int64_t ldvt, double* superb) { return LAPACKE_zgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, - reinterpret_cast(a), lda, s, - reinterpret_cast(u), ldu, - reinterpret_cast(vt), ldvt, superb); + reinterpret_cast(a), lda, s, + reinterpret_cast(u), ldu, + reinterpret_cast(vt), ldvt, superb); } -inline int64_t getrf(int64_t m, int64_t n, std::complex *a, int64_t lda, int64_t *ipiv) { - return LAPACKE_cgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); +inline int64_t getrf(int64_t m, int64_t n, std::complex* a, int64_t lda, int64_t* ipiv) { + return LAPACKE_cgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t getrf(int64_t m, int64_t n, double *a, int64_t lda, int64_t *ipiv) { - return LAPACKE_dgetrf(LAPACK_COL_MAJOR, m, n, a, lda, reinterpret_cast(ipiv)); +inline int64_t getrf(int64_t m, int64_t n, double* a, int64_t lda, int64_t* ipiv) { + return LAPACKE_dgetrf(LAPACK_COL_MAJOR, m, n, a, lda, reinterpret_cast(ipiv)); } -inline int64_t getrf(int64_t m, int64_t n, float *a, int64_t lda, int64_t *ipiv) { - return LAPACKE_sgetrf(LAPACK_COL_MAJOR, m, n, a, lda, reinterpret_cast(ipiv)); +inline int64_t getrf(int64_t m, int64_t n, float* a, int64_t lda, int64_t* ipiv) { + return LAPACKE_sgetrf(LAPACK_COL_MAJOR, m, n, a, lda, reinterpret_cast(ipiv)); } -inline int64_t getrf(int64_t m, int64_t n, std::complex *a, int64_t lda, int64_t *ipiv) { - return LAPACKE_zgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); +inline int64_t getrf(int64_t m, int64_t n, std::complex* a, int64_t lda, int64_t* ipiv) { + return LAPACKE_zgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t heevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, std::complex *a, - int64_t lda, float *w) { +inline int64_t heevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, std::complex* a, + int64_t lda, float* w) { return LAPACKE_cheevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, - reinterpret_cast(a), lda, w); + reinterpret_cast(a), lda, w); } -inline int64_t heevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, std::complex *a, - int64_t lda, double *w) { +inline int64_t heevd(oneapi::math::job j, oneapi::math::uplo u, int64_t n, std::complex* a, + int64_t lda, double* w) { return LAPACKE_zheevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, - reinterpret_cast(a), lda, w); + reinterpret_cast(a), lda, w); } inline int64_t hegvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n, - std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - float *w) { + std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + float* w) { return LAPACKE_chegvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb, w); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb, w); } inline int64_t hegvd(int64_t itype, oneapi::math::job j, oneapi::math::uplo u, int64_t n, - std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - double *w) { + std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + double* w) { return LAPACKE_zhegvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb, w); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb, w); } -inline int64_t hetrd(oneapi::math::uplo u, int64_t n, std::complex *a, int64_t lda, float *d, - float *e, std::complex *tau) { +inline int64_t hetrd(oneapi::math::uplo u, int64_t n, std::complex* a, int64_t lda, float* d, + float* e, std::complex* tau) { return LAPACKE_chetrd(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, d, e, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, d, e, + reinterpret_cast(tau)); } -inline int64_t hetrd(oneapi::math::uplo u, int64_t n, std::complex *a, int64_t lda, - double *d, double *e, std::complex *tau) { +inline int64_t hetrd(oneapi::math::uplo u, int64_t n, std::complex* a, int64_t lda, + double* d, double* e, std::complex* tau) { return LAPACKE_zhetrd(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, d, e, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, d, e, + reinterpret_cast(tau)); } -inline int64_t hetrf(oneapi::math::uplo u, int64_t n, std::complex *a, int64_t lda, - int64_t *ipiv) { +inline int64_t hetrf(oneapi::math::uplo u, int64_t n, std::complex* a, int64_t lda, + int64_t* ipiv) { return LAPACKE_chetrf(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); + reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t hetrf(oneapi::math::uplo u, int64_t n, std::complex *a, int64_t lda, - int64_t *ipiv) { +inline int64_t hetrf(oneapi::math::uplo u, int64_t n, std::complex* a, int64_t lda, + int64_t* ipiv) { return LAPACKE_zhetrf(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); + reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t ungtr(oneapi::math::uplo u, int64_t n, std::complex *a, int64_t lda, - const std::complex *tau) { +inline int64_t ungtr(oneapi::math::uplo u, int64_t n, std::complex* a, int64_t lda, + const std::complex* tau) { return LAPACKE_cungtr(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, + reinterpret_cast(tau)); } -inline int64_t ungtr(oneapi::math::uplo u, int64_t n, std::complex *a, int64_t lda, - const std::complex *tau) { +inline int64_t ungtr(oneapi::math::uplo u, int64_t n, std::complex* a, int64_t lda, + const std::complex* tau) { return LAPACKE_zungtr(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, + reinterpret_cast(tau)); } inline int64_t unmtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans, - int64_t m, int64_t n, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t m, int64_t n, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_cunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } inline int64_t unmtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans, - int64_t m, int64_t n, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t m, int64_t n, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_zunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } -inline int64_t orgtr(oneapi::math::uplo u, int64_t n, double *a, int64_t lda, const double *tau) { +inline int64_t orgtr(oneapi::math::uplo u, int64_t n, double* a, int64_t lda, const double* tau) { return LAPACKE_dorgtr(LAPACK_COL_MAJOR, to_char(u), n, a, lda, tau); } -inline int64_t orgtr(oneapi::math::uplo u, int64_t n, float *a, int64_t lda, const float *tau) { +inline int64_t orgtr(oneapi::math::uplo u, int64_t n, float* a, int64_t lda, const float* tau) { return LAPACKE_sorgtr(LAPACK_COL_MAJOR, to_char(u), n, a, lda, tau); } inline int64_t ormtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans, - int64_t m, int64_t n, float *a, int64_t lda, const float *tau, float *c, + int64_t m, int64_t n, float* a, int64_t lda, const float* tau, float* c, int64_t ldc) { return LAPACKE_sormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda, tau, c, ldc); } inline int64_t ormtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans, - int64_t m, int64_t n, double *a, int64_t lda, const double *tau, double *c, + int64_t m, int64_t n, double* a, int64_t lda, const double* tau, double* c, int64_t ldc) { return LAPACKE_dormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda, tau, c, ldc); } -inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans, - int64_t m, int64_t n, float *a, int64_t lda, const float *tau, float *c, - int64_t ldc) { +inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u, + oneapi::math::transpose trans, int64_t m, int64_t n, float* a, int64_t lda, + const float* tau, float* c, int64_t ldc) { return LAPACKE_sormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda, tau, c, ldc); } -inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans, - int64_t m, int64_t n, double *a, int64_t lda, const double *tau, double *c, - int64_t ldc) { +inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u, + oneapi::math::transpose trans, int64_t m, int64_t n, double* a, + int64_t lda, const double* tau, double* c, int64_t ldc) { return LAPACKE_dormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda, tau, c, ldc); } -inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans, - int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau, std::complex *c, int64_t ldc) { +inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u, + oneapi::math::transpose trans, int64_t m, int64_t n, + std::complex* a, int64_t lda, std::complex* tau, + std::complex* c, int64_t ldc) { return LAPACKE_cunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); -} -inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u, oneapi::math::transpose trans, - int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau, std::complex *c, int64_t ldc) { + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); +} +inline int64_t or_un_mtr(oneapi::math::side side, oneapi::math::uplo u, + oneapi::math::transpose trans, int64_t m, int64_t n, + std::complex* a, int64_t lda, std::complex* tau, + std::complex* c, int64_t ldc) { return LAPACKE_zunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } -inline int64_t sytrd(oneapi::math::uplo u, int64_t n, float *a, int64_t lda, float *d, float *e, - float *tau) { +inline int64_t sytrd(oneapi::math::uplo u, int64_t n, float* a, int64_t lda, float* d, float* e, + float* tau) { return LAPACKE_ssytrd(LAPACK_COL_MAJOR, to_char(u), n, a, lda, d, e, tau); } -inline int64_t sytrd(oneapi::math::uplo u, int64_t n, double *a, int64_t lda, double *d, double *e, - double *tau) { +inline int64_t sytrd(oneapi::math::uplo u, int64_t n, double* a, int64_t lda, double* d, double* e, + double* tau) { return LAPACKE_dsytrd(LAPACK_COL_MAJOR, to_char(u), n, a, lda, d, e, tau); } -inline int64_t sytrf(oneapi::math::uplo u, int64_t n, float *a, int64_t lda, int64_t *ipiv) { +inline int64_t sytrf(oneapi::math::uplo u, int64_t n, float* a, int64_t lda, int64_t* ipiv) { return LAPACKE_ssytrf(LAPACK_COL_MAJOR, to_char(u), n, a, lda, - reinterpret_cast(ipiv)); + reinterpret_cast(ipiv)); } -inline int64_t sytrf(oneapi::math::uplo u, int64_t n, double *a, int64_t lda, int64_t *ipiv) { +inline int64_t sytrf(oneapi::math::uplo u, int64_t n, double* a, int64_t lda, int64_t* ipiv) { return LAPACKE_dsytrf(LAPACK_COL_MAJOR, to_char(u), n, a, lda, - reinterpret_cast(ipiv)); + reinterpret_cast(ipiv)); } -inline int64_t sytrf(oneapi::math::uplo u, int64_t n, std::complex *a, int64_t lda, - int64_t *ipiv) { +inline int64_t sytrf(oneapi::math::uplo u, int64_t n, std::complex* a, int64_t lda, + int64_t* ipiv) { return LAPACKE_csytrf(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); + reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t sytrf(oneapi::math::uplo u, int64_t n, std::complex *a, int64_t lda, - int64_t *ipiv) { +inline int64_t sytrf(oneapi::math::uplo u, int64_t n, std::complex* a, int64_t lda, + int64_t* ipiv) { return LAPACKE_zsytrf(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); + reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline void orgbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k, double *a, - int64_t lda, const double *tau) { +inline void orgbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k, double* a, + int64_t lda, const double* tau) { LAPACKE_dorgbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, a, lda, tau); } -inline void orgbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k, float *a, - int64_t lda, const float *tau) { +inline void orgbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k, float* a, + int64_t lda, const float* tau) { LAPACKE_sorgbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, a, lda, tau); } -inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, float *a, int64_t lda, const float *tau) { +inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, float* a, int64_t lda, const float* tau) { return LAPACKE_sorgqr(LAPACK_COL_MAJOR, m, n, k, a, lda, tau); } -inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, double *a, int64_t lda, - const double *tau) { +inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, double* a, int64_t lda, + const double* tau) { return LAPACKE_dorgqr(LAPACK_COL_MAJOR, m, n, k, a, lda, tau); } -inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, std::complex *a, int64_t lda, - const std::complex *tau) { - return LAPACKE_cungqr(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), - lda, reinterpret_cast(tau)); +inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, std::complex* a, int64_t lda, + const std::complex* tau) { + return LAPACKE_cungqr(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), + lda, reinterpret_cast(tau)); } -inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, std::complex *a, int64_t lda, - const std::complex *tau) { - return LAPACKE_zungqr(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), - lda, reinterpret_cast(tau)); +inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, std::complex* a, int64_t lda, + const std::complex* tau) { + return LAPACKE_zungqr(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), + lda, reinterpret_cast(tau)); } inline int64_t or_un_mqr(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n, - int64_t k, const float *a, int64_t lda, const float *tau, float *c, + int64_t k, const float* a, int64_t lda, const float* tau, float* c, int64_t ldc) { return LAPACKE_sormqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc); } inline int64_t or_un_mqr(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n, - int64_t k, const double *a, int64_t lda, const double *tau, double *c, + int64_t k, const double* a, int64_t lda, const double* tau, double* c, int64_t ldc) { return LAPACKE_dormqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc); } inline int64_t or_un_mqr(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n, - int64_t k, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t k, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_cunmqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } inline int64_t or_un_mqr(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n, - int64_t k, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t k, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_zunmqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } -inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, float *a, int64_t lda, const float *tau) { +inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, float* a, int64_t lda, const float* tau) { return LAPACKE_sorgrq(LAPACK_COL_MAJOR, m, n, k, a, lda, tau); } -inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, double *a, int64_t lda, - const double *tau) { +inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, double* a, int64_t lda, + const double* tau) { return LAPACKE_dorgrq(LAPACK_COL_MAJOR, m, n, k, a, lda, tau); } -inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, std::complex *a, int64_t lda, - const std::complex *tau) { - return LAPACKE_cungrq(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), - lda, reinterpret_cast(tau)); +inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, std::complex* a, int64_t lda, + const std::complex* tau) { + return LAPACKE_cungrq(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), + lda, reinterpret_cast(tau)); } -inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, std::complex *a, int64_t lda, - const std::complex *tau) { - return LAPACKE_zungrq(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), - lda, reinterpret_cast(tau)); +inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, std::complex* a, int64_t lda, + const std::complex* tau) { + return LAPACKE_zungrq(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), + lda, reinterpret_cast(tau)); } inline int64_t or_un_mrq(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n, - int64_t k, const float *a, int64_t lda, const float *tau, float *c, + int64_t k, const float* a, int64_t lda, const float* tau, float* c, int64_t ldc) { return LAPACKE_sormrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc); } inline int64_t or_un_mrq(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n, - int64_t k, const double *a, int64_t lda, const double *tau, double *c, + int64_t k, const double* a, int64_t lda, const double* tau, double* c, int64_t ldc) { return LAPACKE_dormrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc); } inline int64_t or_un_mrq(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n, - int64_t k, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t k, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_cunmrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } inline int64_t or_un_mrq(oneapi::math::side s, oneapi::math::transpose t, int64_t m, int64_t n, - int64_t k, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t k, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_zunmrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } -inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, std::complex *a, +inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, std::complex* a, int64_t lda) { return LAPACKE_cpotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, double *a, int64_t lda) { +inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, double* a, int64_t lda) { return LAPACKE_dpotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda); } -inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, float *a, int64_t lda) { +inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, float* a, int64_t lda) { return LAPACKE_spotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda); } -inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, std::complex *a, +inline int64_t potrf(oneapi::math::uplo upper_lower, int64_t n, std::complex* a, int64_t lda) { return LAPACKE_zpotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs, - const std::complex *a, int64_t lda, std::complex *b, + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb) { return LAPACKE_cpotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs, const double *a, - int64_t lda, double *b, int64_t ldb) { +inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs, const double* a, + int64_t lda, double* b, int64_t ldb) { return LAPACKE_dpotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, a, lda, b, ldb); } -inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs, const float *a, - int64_t lda, float *b, int64_t ldb) { +inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs, const float* a, + int64_t lda, float* b, int64_t ldb) { return LAPACKE_spotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, a, lda, b, ldb); } inline int64_t potrs(oneapi::math::uplo upper_lower, int64_t n, int64_t nrhs, - const std::complex *a, int64_t lda, std::complex *b, + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb) { return LAPACKE_zpotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, std::complex *a, +inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, std::complex* a, int64_t lda) { return LAPACKE_cpotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, double *a, int64_t lda) { +inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, double* a, int64_t lda) { return LAPACKE_dpotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda); } -inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, float *a, int64_t lda) { +inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, float* a, int64_t lda) { return LAPACKE_spotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda); } -inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, std::complex *a, +inline int64_t potri(oneapi::math::uplo upper_lower, int64_t n, std::complex* a, int64_t lda) { return LAPACKE_zpotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline int64_t laswp(int64_t n, std::complex *a, int64_t lda, int64_t k1, int64_t k2, - const int64_t *ipiv, int64_t incx) { - return LAPACKE_claswp(LAPACK_COL_MAJOR, n, reinterpret_cast(a), lda, k1, - k2, reinterpret_cast(ipiv), incx); +inline int64_t laswp(int64_t n, std::complex* a, int64_t lda, int64_t k1, int64_t k2, + const int64_t* ipiv, int64_t incx) { + return LAPACKE_claswp(LAPACK_COL_MAJOR, n, reinterpret_cast(a), lda, k1, + k2, reinterpret_cast(ipiv), incx); } -inline int64_t laswp(int64_t n, double *a, int64_t lda, int64_t k1, int64_t k2, const int64_t *ipiv, +inline int64_t laswp(int64_t n, double* a, int64_t lda, int64_t k1, int64_t k2, const int64_t* ipiv, int64_t incx) { return LAPACKE_dlaswp(LAPACK_COL_MAJOR, n, a, lda, k1, k2, - reinterpret_cast(ipiv), incx); + reinterpret_cast(ipiv), incx); } -inline int64_t laswp(int64_t n, float *a, int64_t lda, int64_t k1, int64_t k2, const int64_t *ipiv, +inline int64_t laswp(int64_t n, float* a, int64_t lda, int64_t k1, int64_t k2, const int64_t* ipiv, int64_t incx) { return LAPACKE_slaswp(LAPACK_COL_MAJOR, n, a, lda, k1, k2, - reinterpret_cast(ipiv), incx); + reinterpret_cast(ipiv), incx); } -inline int64_t laswp(int64_t n, std::complex *a, int64_t lda, int64_t k1, int64_t k2, - const int64_t *ipiv, int64_t incx) { - return LAPACKE_zlaswp(LAPACK_COL_MAJOR, n, reinterpret_cast(a), lda, - k1, k2, reinterpret_cast(ipiv), incx); +inline int64_t laswp(int64_t n, std::complex* a, int64_t lda, int64_t k1, int64_t k2, + const int64_t* ipiv, int64_t incx) { + return LAPACKE_zlaswp(LAPACK_COL_MAJOR, n, reinterpret_cast(a), lda, k1, + k2, reinterpret_cast(ipiv), incx); } inline void ungbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k, - std::complex *a, int64_t lda, const std::complex *tau) { + std::complex* a, int64_t lda, const std::complex* tau) { LAPACKE_cungbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, + reinterpret_cast(tau)); } inline void ungbr(oneapi::math::generate vect, int64_t m, int64_t n, int64_t k, - std::complex *a, int64_t lda, const std::complex *tau) { + std::complex* a, int64_t lda, const std::complex* tau) { LAPACKE_zungbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, + reinterpret_cast(tau)); } -inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - int64_t n, int64_t nrhs, const float *a, int64_t lda, float *b, int64_t ldb) { +inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, int64_t n, int64_t nrhs, const float* a, int64_t lda, + float* b, int64_t ldb) { return LAPACKE_strtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs, a, lda, b, ldb); } -inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - int64_t n, int64_t nrhs, const double *a, int64_t lda, double *b, - int64_t ldb) { +inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, int64_t n, int64_t nrhs, const double* a, int64_t lda, + double* b, int64_t ldb) { return LAPACKE_dtrtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs, a, lda, b, ldb); } -inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - int64_t n, int64_t nrhs, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb) { +inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, int64_t n, int64_t nrhs, const std::complex* a, + int64_t lda, std::complex* b, int64_t ldb) { return LAPACKE_ctrtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans, oneapi::math::diag diag, - int64_t n, int64_t nrhs, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb) { +inline int64_t trtrs(oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, int64_t n, int64_t nrhs, + const std::complex* a, int64_t lda, std::complex* b, + int64_t ldb) { return LAPACKE_ztrtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } } //namespace reference diff --git a/tests/unit_tests/lapack/source/gebrd.cpp b/tests/unit_tests/lapack/source/gebrd.cpp index 650a73145..2bee82853 100644 --- a/tests/unit_tests/lapack/source/gebrd.cpp +++ b/tests/unit_tests/lapack/source/gebrd.cpp @@ -83,10 +83,10 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64 #ifdef CALL_RT_API oneapi::math::lapack::gebrd(queue, m, n, A_dev, lda, d_dev, e_dev, tauq_dev, taup_dev, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else - TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::gebrd, m, n, A_dev, lda, d_dev, e_dev, - tauq_dev, taup_dev, scratchpad_dev, scratchpad_size); + TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::gebrd, m, n, A_dev, lda, d_dev, + e_dev, tauq_dev, taup_dev, scratchpad_dev, scratchpad_size); #endif queue.wait_and_throw(); diff --git a/tests/unit_tests/lapack/source/geqrf.cpp b/tests/unit_tests/lapack/source/geqrf.cpp index 570fb835e..231be4b5d 100644 --- a/tests/unit_tests/lapack/source/geqrf.cpp +++ b/tests/unit_tests/lapack/source/geqrf.cpp @@ -75,7 +75,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64 #ifdef CALL_RT_API oneapi::math::lapack::geqrf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::geqrf, m, n, A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size); @@ -135,7 +135,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::geqrf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::geqrf, m, n, A_dev, lda, diff --git a/tests/unit_tests/lapack/source/geqrf_batch_group.cpp b/tests/unit_tests/lapack/source/geqrf_batch_group.cpp index 5f6f93b7c..c94ddd000 100644 --- a/tests/unit_tests/lapack/source/geqrf_batch_group.cpp +++ b/tests/unit_tests/lapack/source/geqrf_batch_group.cpp @@ -121,8 +121,8 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { #ifdef CALL_RT_API oneapi::math::lapack::geqrf_batch(queue, m_vec.data(), n_vec.data(), A_dev_ptrs, - lda_vec.data(), tau_dev_ptrs, group_count, - group_sizes_vec.data(), scratchpad_dev, scratchpad_size); + lda_vec.data(), tau_dev_ptrs, group_count, + group_sizes_vec.data(), scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::geqrf_batch, m_vec.data(), n_vec.data(), A_dev_ptrs, lda_vec.data(), tau_dev_ptrs, diff --git a/tests/unit_tests/lapack/source/geqrf_batch_stride.cpp b/tests/unit_tests/lapack/source/geqrf_batch_stride.cpp index 227fdc30f..a3cdb55e3 100644 --- a/tests/unit_tests/lapack/source/geqrf_batch_stride.cpp +++ b/tests/unit_tests/lapack/source/geqrf_batch_stride.cpp @@ -76,7 +76,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, int64_ #ifdef CALL_RT_API oneapi::math::lapack::geqrf_batch(queue, m, n, A_dev, lda, stride_a, tau_dev, stride_tau, - batch_size, scratchpad_dev, scratchpad_size); + batch_size, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::geqrf_batch, m, n, A_dev, lda, stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev, @@ -155,9 +155,10 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::geqrf_batch, m, n, A_dev, - lda, stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::geqrf_batch, m, n, + A_dev, lda, stride_a, tau_dev, stride_tau, batch_size, + scratchpad_dev, scratchpad_size, + std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/gerqf.cpp b/tests/unit_tests/lapack/source/gerqf.cpp index 091aa5d68..bb6693598 100644 --- a/tests/unit_tests/lapack/source/gerqf.cpp +++ b/tests/unit_tests/lapack/source/gerqf.cpp @@ -75,7 +75,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64 #ifdef CALL_RT_API oneapi::math::lapack::gerqf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::gerqf, m, n, A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size); @@ -135,7 +135,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::gerqf(queue, m, n, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::gerqf, m, n, A_dev, lda, diff --git a/tests/unit_tests/lapack/source/gesvd.cpp b/tests/unit_tests/lapack/source/gesvd.cpp index 2d793c1e8..afca7850b 100644 --- a/tests/unit_tests/lapack/source/gesvd.cpp +++ b/tests/unit_tests/lapack/source/gesvd.cpp @@ -86,7 +86,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::jobsvd jobu, oneapi::math:: #ifdef CALL_RT_API oneapi::math::lapack::gesvd(queue, jobu, jobvt, m, n, A_dev, lda, s_dev, U_dev, ldu, Vt_dev, - ldvt, scratchpad_dev, scratchpad_size); + ldvt, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::gesvd, jobu, jobvt, m, n, A_dev, lda, s_dev, U_dev, ldu, Vt_dev, ldvt, scratchpad_dev, scratchpad_size); @@ -116,8 +116,8 @@ bool accuracy(const sycl::device& dev, oneapi::math::jobsvd jobu, oneapi::math:: US[row + col * ldus] = U[row + col * ldu] * s[col]; std::vector USV(m * n); int64_t ldusv = m; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, m, n, n, - 1.0, US.data(), ldus, Vt.data(), ldvt, 0.0, USV.data(), ldusv); + reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, m, n, + n, 1.0, US.data(), ldus, Vt.data(), ldvt, 0.0, USV.data(), ldusv); if (!rel_mat_err_check(m, n, A_initial, lda, USV, ldusv)) { test_log::lout << "Factorization check failed" << std::endl; result = false; @@ -134,8 +134,8 @@ bool accuracy(const sycl::device& dev, oneapi::math::jobsvd jobu, oneapi::math:: /* |I - U' U| < n O(eps) */ std::vector UU(ucols * ucols); int64_t lduu = ucols; - reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, ucols, - ucols, m, 1.0, U.data(), ldu, U.data(), ldu, 0.0, UU.data(), lduu); + reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, + ucols, ucols, m, 1.0, U.data(), ldu, U.data(), ldu, 0.0, UU.data(), lduu); if (!rel_id_err_check(ucols, UU, lduu)) { test_log::lout << "U Orthogonality check failed" << std::endl; result = false; @@ -147,8 +147,9 @@ bool accuracy(const sycl::device& dev, oneapi::math::jobsvd jobu, oneapi::math:: /* |I - V' V| < n O(eps) */ std::vector VV(vtrows * vtrows); int64_t ldvv = vtrows; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, vtrows, - vtrows, n, 1.0, Vt.data(), ldvt, Vt.data(), ldvt, 0.0, VV.data(), ldvv); + reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::conjtrans, + vtrows, vtrows, n, 1.0, Vt.data(), ldvt, Vt.data(), ldvt, 0.0, VV.data(), + ldvv); if (!rel_id_err_check(vtrows, VV, ldvv)) { test_log::lout << "V Orthogonality check failed" << std::endl; result = false; @@ -214,8 +215,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::jobsvd jobu, oneapi:: scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::gesvd, jobu, jobvt, m, n, - A_dev, lda, s_dev, U_dev, ldu, Vt_dev, ldvt, scratchpad_dev, + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::gesvd, jobu, jobvt, m, + n, A_dev, lda, s_dev, U_dev, ldu, Vt_dev, ldvt, scratchpad_dev, scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/getrf.cpp b/tests/unit_tests/lapack/source/getrf.cpp index 165c7fed5..ca84f6680 100644 --- a/tests/unit_tests/lapack/source/getrf.cpp +++ b/tests/unit_tests/lapack/source/getrf.cpp @@ -78,7 +78,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, uint64 #ifdef CALL_RT_API oneapi::math::lapack::getrf(queue, m, n, A_dev, lda, ipiv_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrf, m, n, A_dev, lda, ipiv_dev, scratchpad_dev, scratchpad_size); @@ -138,7 +138,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::getrf(queue, m, n, A_dev, lda, ipiv_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getrf, m, n, A_dev, lda, diff --git a/tests/unit_tests/lapack/source/getrf_batch_group.cpp b/tests/unit_tests/lapack/source/getrf_batch_group.cpp index 0641698df..329f3d767 100644 --- a/tests/unit_tests/lapack/source/getrf_batch_group.cpp +++ b/tests/unit_tests/lapack/source/getrf_batch_group.cpp @@ -125,8 +125,8 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { #ifdef CALL_RT_API oneapi::math::lapack::getrf_batch(queue, m_vec.data(), n_vec.data(), A_dev_ptrs, - lda_vec.data(), ipiv_dev_ptrs, group_count, - group_sizes_vec.data(), scratchpad_dev, scratchpad_size); + lda_vec.data(), ipiv_dev_ptrs, group_count, + group_sizes_vec.data(), scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrf_batch, m_vec.data(), n_vec.data(), A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs, diff --git a/tests/unit_tests/lapack/source/getrf_batch_stride.cpp b/tests/unit_tests/lapack/source/getrf_batch_stride.cpp index 62c55d5fb..0ba2d1714 100644 --- a/tests/unit_tests/lapack/source/getrf_batch_stride.cpp +++ b/tests/unit_tests/lapack/source/getrf_batch_stride.cpp @@ -76,7 +76,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, int64_ #ifdef CALL_RT_API oneapi::math::lapack::getrf_batch(queue, m, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv, - batch_size, scratchpad_dev, scratchpad_size); + batch_size, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrf_batch, m, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv, batch_size, scratchpad_dev, @@ -155,9 +155,10 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t lda, scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getrf_batch, m, n, A_dev, - lda, stride_a, ipiv_dev, stride_ipiv, batch_size, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getrf_batch, m, n, + A_dev, lda, stride_a, ipiv_dev, stride_ipiv, batch_size, + scratchpad_dev, scratchpad_size, + std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/getri.cpp b/tests/unit_tests/lapack/source/getri.cpp index 24d272520..26b7c7307 100644 --- a/tests/unit_tests/lapack/source/getri.cpp +++ b/tests/unit_tests/lapack/source/getri.cpp @@ -83,7 +83,8 @@ bool accuracy(const sycl::device& dev, int64_t n, int64_t lda, uint64_t seed) { queue.wait_and_throw(); #ifdef CALL_RT_API - oneapi::math::lapack::getri(queue, n, A_dev, lda, ipiv_dev, scratchpad_dev, scratchpad_size); + oneapi::math::lapack::getri(queue, n, A_dev, lda, ipiv_dev, scratchpad_dev, + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getri, n, A_dev, lda, ipiv_dev, scratchpad_dev, scratchpad_size); @@ -149,7 +150,7 @@ bool usm_dependency(const sycl::device& dev, int64_t n, int64_t lda, uint64_t se #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::getri(queue, n, A_dev, lda, ipiv_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::getri, n, A_dev, lda, diff --git a/tests/unit_tests/lapack/source/getri_batch_group.cpp b/tests/unit_tests/lapack/source/getri_batch_group.cpp index d700a37f4..d13b40442 100644 --- a/tests/unit_tests/lapack/source/getri_batch_group.cpp +++ b/tests/unit_tests/lapack/source/getri_batch_group.cpp @@ -135,11 +135,11 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { #ifdef CALL_RT_API oneapi::math::lapack::getri_batch(queue, n_vec.data(), A_dev_ptrs, lda_vec.data(), - ipiv_dev_ptrs, group_count, group_sizes_vec.data(), - scratchpad_dev, scratchpad_size); + ipiv_dev_ptrs, group_count, group_sizes_vec.data(), + scratchpad_dev, scratchpad_size); #else - TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getri_batch, n_vec.data(), A_dev_ptrs, - lda_vec.data(), ipiv_dev_ptrs, group_count, + TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getri_batch, n_vec.data(), + A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs, group_count, group_sizes_vec.data(), scratchpad_dev, scratchpad_size); #endif queue.wait_and_throw(); diff --git a/tests/unit_tests/lapack/source/getri_batch_stride.cpp b/tests/unit_tests/lapack/source/getri_batch_stride.cpp index aee87f7b9..858847f5d 100644 --- a/tests/unit_tests/lapack/source/getri_batch_stride.cpp +++ b/tests/unit_tests/lapack/source/getri_batch_stride.cpp @@ -84,7 +84,7 @@ bool accuracy(const sycl::device& dev, int64_t n, int64_t lda, int64_t stride_a, #ifdef CALL_RT_API oneapi::math::lapack::getri_batch(queue, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv, - batch_size, scratchpad_dev, scratchpad_size); + batch_size, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getri_batch, n, A_dev, lda, stride_a, ipiv_dev, stride_ipiv, batch_size, scratchpad_dev, diff --git a/tests/unit_tests/lapack/source/getrs.cpp b/tests/unit_tests/lapack/source/getrs.cpp index 4532e3dd9..f43abb427 100644 --- a/tests/unit_tests/lapack/source/getrs.cpp +++ b/tests/unit_tests/lapack/source/getrs.cpp @@ -86,7 +86,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::transpose trans, int64_t n, #ifdef CALL_RT_API oneapi::math::lapack::getrs(queue, trans, n, nrhs, A_dev, lda, ipiv_dev, B_dev, ldb, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrs, trans, n, nrhs, A_dev, lda, ipiv_dev, B_dev, ldb, scratchpad_dev, scratchpad_size); diff --git a/tests/unit_tests/lapack/source/getrs_batch_group.cpp b/tests/unit_tests/lapack/source/getrs_batch_group.cpp index d2cd80e9c..64c88bcd6 100644 --- a/tests/unit_tests/lapack/source/getrs_batch_group.cpp +++ b/tests/unit_tests/lapack/source/getrs_batch_group.cpp @@ -47,7 +47,7 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { /* Test Parameters */ std::vector trans_vec = { oneapi::math::transpose::nontrans, - oneapi::math::transpose::trans }; + oneapi::math::transpose::trans }; std::vector n_vec = { 4, 5 }; std::vector nrhs_vec = { 9, 6 }; std::vector lda_vec = { 6, 6 }; @@ -163,9 +163,9 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { #ifdef CALL_RT_API oneapi::math::lapack::getrs_batch(queue, trans_vec.data(), n_vec.data(), nrhs_vec.data(), - A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs, B_dev_ptrs, - ldb_vec.data(), group_count, group_sizes_vec.data(), - scratchpad_dev, scratchpad_size); + A_dev_ptrs, lda_vec.data(), ipiv_dev_ptrs, B_dev_ptrs, + ldb_vec.data(), group_count, group_sizes_vec.data(), + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrs_batch, trans_vec.data(), n_vec.data(), nrhs_vec.data(), A_dev_ptrs, lda_vec.data(), diff --git a/tests/unit_tests/lapack/source/getrs_batch_stride.cpp b/tests/unit_tests/lapack/source/getrs_batch_stride.cpp index ff159416f..600067883 100644 --- a/tests/unit_tests/lapack/source/getrs_batch_stride.cpp +++ b/tests/unit_tests/lapack/source/getrs_batch_stride.cpp @@ -79,8 +79,8 @@ bool accuracy(const sycl::device& dev, oneapi::math::transpose trans, int64_t n, #else int64_t scratchpad_size; TEST_RUN_LAPACK_CT_SELECT( - queue, scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size, trans, n, - nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); + queue, scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size, trans, + n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); #endif auto scratchpad_dev = device_alloc(queue, scratchpad_size); @@ -91,8 +91,8 @@ bool accuracy(const sycl::device& dev, oneapi::math::transpose trans, int64_t n, #ifdef CALL_RT_API oneapi::math::lapack::getrs_batch(queue, trans, n, nrhs, A_dev, lda, stride_a, ipiv_dev, - stride_ipiv, B_dev, ldb, stride_b, batch_size, - scratchpad_dev, scratchpad_size); + stride_ipiv, B_dev, ldb, stride_b, batch_size, + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::getrs_batch, trans, n, nrhs, A_dev, lda, stride_a, ipiv_dev, stride_ipiv, B_dev, ldb, stride_b, @@ -168,8 +168,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::transpose trans, int6 #else int64_t scratchpad_size; TEST_RUN_LAPACK_CT_SELECT( - queue, scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size, trans, n, - nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); + queue, scratchpad_size = oneapi::math::lapack::getrs_batch_scratchpad_size, trans, + n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); #endif auto scratchpad_dev = device_alloc(queue, scratchpad_size); diff --git a/tests/unit_tests/lapack/source/heevd.cpp b/tests/unit_tests/lapack/source/heevd.cpp index b2a4c906b..8eecbba2b 100644 --- a/tests/unit_tests/lapack/source/heevd.cpp +++ b/tests/unit_tests/lapack/source/heevd.cpp @@ -73,7 +73,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::job jobz, oneapi::math::upl #ifdef CALL_RT_API oneapi::math::lapack::heevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::heevd, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev, scratchpad_size); @@ -134,7 +134,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::job jobz, oneapi::mat #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::heevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::heevd, jobz, uplo, n, diff --git a/tests/unit_tests/lapack/source/hegvd.cpp b/tests/unit_tests/lapack/source/hegvd.cpp index a072cd410..55422330b 100644 --- a/tests/unit_tests/lapack/source/hegvd.cpp +++ b/tests/unit_tests/lapack/source/hegvd.cpp @@ -42,8 +42,8 @@ const char* accuracy_input = R"( )"; template -bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - int64_t n, int64_t lda, int64_t ldb, uint64_t seed) { +bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, int64_t n, int64_t lda, int64_t ldb, uint64_t seed) { using fp = typename data_T_info::value_type; using fp_real = typename complex_info::real_type; @@ -80,7 +80,7 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, on #ifdef CALL_RT_API oneapi::math::lapack::hegvd(queue, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::hegvd, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev, scratchpad_size); @@ -141,8 +141,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, on /* |I - Z' B Z| < n O(eps) */ std::vector ZBZ(n * n); int64_t ldzbz = n; - reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, n, - n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz); + reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, + n, n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz); if (!rel_id_err_check(n, ZBZ, ldzbz)) { test_log::lout << "Orthogonality check failed" << std::endl; result = false; @@ -175,8 +175,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, on /* |I - Z' B Z| < n O(eps) */ std::vector ZBZ(n * n); int64_t ldzbz = n; - reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, n, - n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz); + reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, + n, n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz); if (!rel_id_err_check(n, ZBZ, ldzbz)) { test_log::lout << "Orthogonality check failed" << std::endl; result = false; @@ -213,8 +213,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, on /* |I - Z' B^-1 Z| = |I - Z' C| < n O(eps) */ std::vector ZhC(n * n); int64_t ldzhc = n; - reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, n, - n, n, 1.0, Z.data(), ldz, C.data(), ldc, 0.0, ZhC.data(), ldzhc); + reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, + n, n, n, 1.0, Z.data(), ldz, C.data(), ldc, 0.0, ZhC.data(), ldzhc); if (!rel_id_err_check(n, ZhC, ldzhc)) { test_log::lout << "Orthogonality check failed" << std::endl; result = false; @@ -274,9 +274,9 @@ bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::math::job jo scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::hegvd, itype, jobz, uplo, - n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev, scratchpad_size, - std::vector{ in_event }); + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::hegvd, itype, jobz, + uplo, n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev, + scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/hetrd.cpp b/tests/unit_tests/lapack/source/hetrd.cpp index 4a5bb0345..a5b078eeb 100644 --- a/tests/unit_tests/lapack/source/hetrd.cpp +++ b/tests/unit_tests/lapack/source/hetrd.cpp @@ -79,7 +79,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 #ifdef CALL_RT_API oneapi::math::lapack::hetrd(queue, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::hetrd, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev, scratchpad_dev, scratchpad_size); diff --git a/tests/unit_tests/lapack/source/hetrf.cpp b/tests/unit_tests/lapack/source/hetrf.cpp index f22384b2c..72c767b74 100644 --- a/tests/unit_tests/lapack/source/hetrf.cpp +++ b/tests/unit_tests/lapack/source/hetrf.cpp @@ -77,7 +77,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 #ifdef CALL_RT_API oneapi::math::lapack::hetrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::hetrf, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev, scratchpad_size); @@ -117,9 +117,9 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 reference::swap(n, Uk.data() + (k + 0 * ldu), ldu, Uk.data() + (piv + 0 * ldu), ldu); auto U_temp = U; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, - n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(), - ldu); + reference::gemm(oneapi::math::transpose::nontrans, + oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu, + Uk.data(), ldu, 0.0, U.data(), ldu); D[k + k * ldd] = A[k + k * lda]; k -= 1; @@ -135,9 +135,9 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 reference::swap(n, Uk.data() + (k - 1 + 0 * ldu), ldu, Uk.data() + (piv + 0 * ldu), ldu); auto U_temp = U; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, - n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(), - ldu); + reference::gemm(oneapi::math::transpose::nontrans, + oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu, + Uk.data(), ldu, 0.0, U.data(), ldu); D[k + k * ldd] = A[k + k * lda]; D[k - 1 + (k - 1) * ldd] = A[k - 1 + (k - 1) * lda]; @@ -160,9 +160,9 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 reference::swap(n, Uk.data() + (k + 0 * lda), ldu, Uk.data() + (piv + 0 * ldu), ldu); auto U_temp = U; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, - n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(), - ldu); + reference::gemm(oneapi::math::transpose::nontrans, + oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu, + Uk.data(), ldu, 0.0, U.data(), ldu); D[k + (k)*ldd] = A[k + (k)*lda]; k += 1; @@ -178,9 +178,9 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 reference::swap(n, Uk.data() + (k + 1 + 0 * ldu), ldu, Uk.data() + (piv + 0 * ldu), ldu); auto U_temp = U; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, - n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(), - ldu); + reference::gemm(oneapi::math::transpose::nontrans, + oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu, + Uk.data(), ldu, 0.0, U.data(), ldu); D[k + k * ldd] = A[k + k * lda]; D[k + 1 + (k + 1) * ldd] = A[k + 1 + (k + 1) * lda]; @@ -253,7 +253,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::hetrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::hetrf, uplo, n, A_dev, diff --git a/tests/unit_tests/lapack/source/orgbr.cpp b/tests/unit_tests/lapack/source/orgbr.cpp index 1ed2ab0fd..3f78a9653 100644 --- a/tests/unit_tests/lapack/source/orgbr.cpp +++ b/tests/unit_tests/lapack/source/orgbr.cpp @@ -94,7 +94,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::generate vect, int64_t m, i #ifdef CALL_RT_API oneapi::math::lapack::orgbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgbr, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size); @@ -172,7 +172,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::generate vect, int64_ #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::orgbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::orgbr, vect, m, n, k, diff --git a/tests/unit_tests/lapack/source/orgqr.cpp b/tests/unit_tests/lapack/source/orgqr.cpp index f8ddc4f51..8f1da86b8 100644 --- a/tests/unit_tests/lapack/source/orgqr.cpp +++ b/tests/unit_tests/lapack/source/orgqr.cpp @@ -82,7 +82,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t #ifdef CALL_RT_API oneapi::math::lapack::orgqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgqr, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size); @@ -146,7 +146,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::orgqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::orgqr, m, n, k, A_dev, diff --git a/tests/unit_tests/lapack/source/orgqr_batch_group.cpp b/tests/unit_tests/lapack/source/orgqr_batch_group.cpp index 4f44c9cf0..172471725 100644 --- a/tests/unit_tests/lapack/source/orgqr_batch_group.cpp +++ b/tests/unit_tests/lapack/source/orgqr_batch_group.cpp @@ -131,8 +131,8 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { #ifdef CALL_RT_API oneapi::math::lapack::orgqr_batch(queue, m_vec.data(), n_vec.data(), k_vec.data(), - A_dev_ptrs, lda_vec.data(), tau_dev_ptrs, group_count, - group_sizes_vec.data(), scratchpad_dev, scratchpad_size); + A_dev_ptrs, lda_vec.data(), tau_dev_ptrs, group_count, + group_sizes_vec.data(), scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgqr_batch, m_vec.data(), n_vec.data(), k_vec.data(), A_dev_ptrs, lda_vec.data(), diff --git a/tests/unit_tests/lapack/source/orgqr_batch_stride.cpp b/tests/unit_tests/lapack/source/orgqr_batch_stride.cpp index c88b6e88c..8ac2b2dff 100644 --- a/tests/unit_tests/lapack/source/orgqr_batch_stride.cpp +++ b/tests/unit_tests/lapack/source/orgqr_batch_stride.cpp @@ -83,7 +83,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t #ifdef CALL_RT_API oneapi::math::lapack::orgqr_batch(queue, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau, - batch_size, scratchpad_dev, scratchpad_size); + batch_size, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgqr_batch, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev, diff --git a/tests/unit_tests/lapack/source/orgtr.cpp b/tests/unit_tests/lapack/source/orgtr.cpp index 4a909d04e..154402156 100644 --- a/tests/unit_tests/lapack/source/orgtr.cpp +++ b/tests/unit_tests/lapack/source/orgtr.cpp @@ -80,7 +80,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 #ifdef CALL_RT_API oneapi::math::lapack::orgtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::orgtr, uplo, n, A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size); @@ -147,7 +147,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::orgtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::orgtr, uplo, n, A_dev, diff --git a/tests/unit_tests/lapack/source/ormqr.cpp b/tests/unit_tests/lapack/source/ormqr.cpp index 5181d9a3b..1c27a5cd7 100644 --- a/tests/unit_tests/lapack/source/ormqr.cpp +++ b/tests/unit_tests/lapack/source/ormqr.cpp @@ -92,7 +92,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::side left_right, oneapi::ma #ifdef CALL_RT_API oneapi::math::lapack::ormqr(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, - ldc, scratchpad_dev, scratchpad_size); + ldc, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ormqr, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size); @@ -185,8 +185,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::side left_right, scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ormqr, left_right, trans, - m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ormqr, left_right, + trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/ormrq.cpp b/tests/unit_tests/lapack/source/ormrq.cpp index 375632336..f63dca6a8 100644 --- a/tests/unit_tests/lapack/source/ormrq.cpp +++ b/tests/unit_tests/lapack/source/ormrq.cpp @@ -102,7 +102,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::side left_right, oneapi::ma #ifdef CALL_RT_API oneapi::math::lapack::ormrq(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, - ldc, scratchpad_dev, scratchpad_size); + ldc, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ormrq, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size); @@ -194,8 +194,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::side left_right, scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ormrq, left_right, trans, - m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ormrq, left_right, + trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/ormtr.cpp b/tests/unit_tests/lapack/source/ormtr.cpp index fd37974ea..dde332b32 100644 --- a/tests/unit_tests/lapack/source/ormtr.cpp +++ b/tests/unit_tests/lapack/source/ormtr.cpp @@ -90,10 +90,10 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t m, int64 #ifdef CALL_RT_API oneapi::math::lapack::ormtr(queue, side, uplo, trans, m, n, A_dev, lda, tau_dev, C_dev, ldc, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else - TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ormtr, side, uplo, trans, m, n, A_dev, - lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size); + TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ormtr, side, uplo, trans, m, n, + A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size); #endif queue.wait_and_throw(); @@ -184,8 +184,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t m, scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ormtr, side, uplo, trans, - m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ormtr, side, uplo, + trans, m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/potrf.cpp b/tests/unit_tests/lapack/source/potrf.cpp index 19063dfcf..2cd37de30 100644 --- a/tests/unit_tests/lapack/source/potrf.cpp +++ b/tests/unit_tests/lapack/source/potrf.cpp @@ -130,7 +130,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::potrf(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size, - std::vector{ in_event }); + std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potrf, uplo, n, A_dev, diff --git a/tests/unit_tests/lapack/source/potrf_batch_group.cpp b/tests/unit_tests/lapack/source/potrf_batch_group.cpp index ce2fbc718..90ceaa5bf 100644 --- a/tests/unit_tests/lapack/source/potrf_batch_group.cpp +++ b/tests/unit_tests/lapack/source/potrf_batch_group.cpp @@ -47,7 +47,7 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { /* Test Parameters */ std::vector uplo_vec = { oneapi::math::uplo::upper, - oneapi::math::uplo::lower }; + oneapi::math::uplo::lower }; std::vector n_vec = { 4, 4 }; std::vector lda_vec = { 5, 5 }; std::vector group_sizes_vec = { 2, 2 }; @@ -114,8 +114,8 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { #ifdef CALL_RT_API oneapi::math::lapack::potrf_batch(queue, uplo_vec.data(), n_vec.data(), A_dev_ptrs, - lda_vec.data(), group_count, group_sizes_vec.data(), - scratchpad_dev, scratchpad_size); + lda_vec.data(), group_count, group_sizes_vec.data(), + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrf_batch, uplo_vec.data(), n_vec.data(), A_dev_ptrs, lda_vec.data(), group_count, diff --git a/tests/unit_tests/lapack/source/potrf_batch_stride.cpp b/tests/unit_tests/lapack/source/potrf_batch_stride.cpp index f893a0f56..15a5d8296 100644 --- a/tests/unit_tests/lapack/source/potrf_batch_stride.cpp +++ b/tests/unit_tests/lapack/source/potrf_batch_stride.cpp @@ -73,7 +73,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 #ifdef CALL_RT_API oneapi::math::lapack::potrf_batch(queue, uplo, n, A_dev, lda, stride_a, batch_size, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrf_batch, uplo, n, A_dev, lda, stride_a, batch_size, scratchpad_dev, scratchpad_size); diff --git a/tests/unit_tests/lapack/source/potri.cpp b/tests/unit_tests/lapack/source/potri.cpp index 3f10bad09..a76958e4e 100644 --- a/tests/unit_tests/lapack/source/potri.cpp +++ b/tests/unit_tests/lapack/source/potri.cpp @@ -164,7 +164,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::potri(queue, uplo, n, A_dev, lda, scratchpad_dev, scratchpad_size, - std::vector{ in_event }); + std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potri, uplo, n, A_dev, diff --git a/tests/unit_tests/lapack/source/potrs.cpp b/tests/unit_tests/lapack/source/potrs.cpp index d3b8abf0c..7c7a9b07b 100644 --- a/tests/unit_tests/lapack/source/potrs.cpp +++ b/tests/unit_tests/lapack/source/potrs.cpp @@ -40,8 +40,8 @@ const char* accuracy_input = R"( )"; template -bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t nrhs, int64_t lda, - int64_t ldb, uint64_t seed) { +bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t nrhs, + int64_t lda, int64_t ldb, uint64_t seed) { using fp = typename data_T_info::value_type; using fp_real = typename complex_info::real_type; @@ -83,7 +83,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 #ifdef CALL_RT_API oneapi::math::lapack::potrs(queue, uplo, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrs, uplo, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev, scratchpad_size); @@ -151,9 +151,9 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, /* Check dependency handling */ auto in_event = create_dependency(queue); #ifdef CALL_RT_API - sycl::event func_event = - oneapi::math::lapack::potrs(queue, uplo, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + sycl::event func_event = oneapi::math::lapack::potrs( + queue, uplo, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev, scratchpad_size, + std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::potrs, uplo, n, nrhs, diff --git a/tests/unit_tests/lapack/source/potrs_batch_group.cpp b/tests/unit_tests/lapack/source/potrs_batch_group.cpp index 90359133d..f469e69c1 100644 --- a/tests/unit_tests/lapack/source/potrs_batch_group.cpp +++ b/tests/unit_tests/lapack/source/potrs_batch_group.cpp @@ -47,7 +47,7 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { /* Test Parameters */ std::vector uplo_vec = { oneapi::math::uplo::upper, - oneapi::math::uplo::lower }; + oneapi::math::uplo::lower }; std::vector n_vec = { 4, 5 }; std::vector nrhs_vec = { 9, 6 }; std::vector lda_vec = { 6, 6 }; @@ -144,9 +144,9 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { #ifdef CALL_RT_API oneapi::math::lapack::potrs_batch(queue, uplo_vec.data(), n_vec.data(), nrhs_vec.data(), - A_dev_ptrs, lda_vec.data(), B_dev_ptrs, ldb_vec.data(), - group_count, group_sizes_vec.data(), scratchpad_dev, - scratchpad_size); + A_dev_ptrs, lda_vec.data(), B_dev_ptrs, ldb_vec.data(), + group_count, group_sizes_vec.data(), scratchpad_dev, + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrs_batch, uplo_vec.data(), n_vec.data(), nrhs_vec.data(), A_dev_ptrs, lda_vec.data(), diff --git a/tests/unit_tests/lapack/source/potrs_batch_stride.cpp b/tests/unit_tests/lapack/source/potrs_batch_stride.cpp index 6affb7105..a46f2816b 100644 --- a/tests/unit_tests/lapack/source/potrs_batch_stride.cpp +++ b/tests/unit_tests/lapack/source/potrs_batch_stride.cpp @@ -40,8 +40,9 @@ const char* accuracy_input = R"( )"; template -bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t nrhs, int64_t lda, - int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size, uint64_t seed) { +bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64_t nrhs, + int64_t lda, int64_t stride_a, int64_t ldb, int64_t stride_b, int64_t batch_size, + uint64_t seed) { using fp = typename data_T_info::value_type; using fp_real = typename complex_info::real_type; @@ -88,7 +89,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 #ifdef CALL_RT_API oneapi::math::lapack::potrs_batch(queue, uplo, n, nrhs, A_dev, lda, stride_a, B_dev, ldb, - stride_b, batch_size, scratchpad_dev, scratchpad_size); + stride_b, batch_size, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::potrs_batch, uplo, n, nrhs, A_dev, lda, stride_a, B_dev, ldb, stride_b, batch_size, scratchpad_dev, diff --git a/tests/unit_tests/lapack/source/syevd.cpp b/tests/unit_tests/lapack/source/syevd.cpp index c0de25b70..5e013aead 100644 --- a/tests/unit_tests/lapack/source/syevd.cpp +++ b/tests/unit_tests/lapack/source/syevd.cpp @@ -73,7 +73,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::job jobz, oneapi::math::upl #ifdef CALL_RT_API oneapi::math::lapack::syevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::syevd, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev, scratchpad_size); @@ -134,7 +134,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::job jobz, oneapi::mat #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::syevd(queue, jobz, uplo, n, A_dev, lda, w_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::syevd, jobz, uplo, n, diff --git a/tests/unit_tests/lapack/source/sygvd.cpp b/tests/unit_tests/lapack/source/sygvd.cpp index ce671feb2..b1f73ada6 100644 --- a/tests/unit_tests/lapack/source/sygvd.cpp +++ b/tests/unit_tests/lapack/source/sygvd.cpp @@ -42,8 +42,8 @@ const char* accuracy_input = R"( )"; template -bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, - int64_t n, int64_t lda, int64_t ldb, uint64_t seed) { +bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, int64_t n, int64_t lda, int64_t ldb, uint64_t seed) { using fp = typename data_T_info::value_type; using fp_real = typename complex_info::real_type; @@ -80,7 +80,7 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, on #ifdef CALL_RT_API oneapi::math::lapack::sygvd(queue, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::sygvd, itype, jobz, uplo, n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev, scratchpad_size); @@ -107,9 +107,9 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, on /* |D_ref - D| < |D_ref| O(eps) */ std::vector D_ref(n); - auto info = - reference::sygvd(itype, oneapi::math::job::novec, uplo, n, std::vector(A_initial).data(), - lda, std::vector(B_initial).data(), ldb, D_ref.data()); + auto info = reference::sygvd(itype, oneapi::math::job::novec, uplo, n, + std::vector(A_initial).data(), lda, + std::vector(B_initial).data(), ldb, D_ref.data()); if (0 != info) { test_log::lout << "reference sygvd failed with info = " << info << std::endl; return false; @@ -146,8 +146,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, on /* |I - Z' B Z| < n O(eps) */ std::vector ZBZ(n * n); int64_t ldzbz = n; - reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, n, - n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz); + reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, + n, n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz); if (!rel_id_err_check(n, ZBZ, ldzbz)) { test_log::lout << "Orthogonality check failed" << std::endl; result = false; @@ -180,8 +180,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, on /* |I - Z' B Z| < n O(eps) */ std::vector ZBZ(n * n); int64_t ldzbz = n; - reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, n, - n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz); + reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, + n, n, n, 1.0, Z.data(), ldz, BZ.data(), ldbz, 0.0, ZBZ.data(), ldzbz); if (!rel_id_err_check(n, ZBZ, ldzbz)) { test_log::lout << "Orthogonality check failed" << std::endl; result = false; @@ -218,8 +218,8 @@ bool accuracy(const sycl::device& dev, int64_t itype, oneapi::math::job jobz, on /* |I - Z' B^-1 Z| = |I - Z' C| < n O(eps) */ std::vector ZhC(n * n); int64_t ldzhc = n; - reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, n, - n, n, 1.0, Z.data(), ldz, C.data(), ldc, 0.0, ZhC.data(), ldzhc); + reference::gemm(oneapi::math::transpose::conjtrans, oneapi::math::transpose::nontrans, + n, n, n, 1.0, Z.data(), ldz, C.data(), ldc, 0.0, ZhC.data(), ldzhc); if (!rel_id_err_check(n, ZhC, ldzhc)) { test_log::lout << "Orthogonality check failed" << std::endl; result = false; @@ -279,9 +279,9 @@ bool usm_dependency(const sycl::device& dev, int64_t itype, oneapi::math::job jo scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::sygvd, itype, jobz, uplo, - n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev, scratchpad_size, - std::vector{ in_event }); + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::sygvd, itype, jobz, + uplo, n, A_dev, lda, B_dev, ldb, w_dev, scratchpad_dev, + scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/sytrd.cpp b/tests/unit_tests/lapack/source/sytrd.cpp index 3b804d84b..a9cd82d48 100644 --- a/tests/unit_tests/lapack/source/sytrd.cpp +++ b/tests/unit_tests/lapack/source/sytrd.cpp @@ -79,7 +79,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 #ifdef CALL_RT_API oneapi::math::lapack::sytrd(queue, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::sytrd, uplo, n, A_dev, lda, d_dev, e_dev, tau_dev, scratchpad_dev, scratchpad_size); diff --git a/tests/unit_tests/lapack/source/sytrf.cpp b/tests/unit_tests/lapack/source/sytrf.cpp index e24fba32e..39d17a0d4 100644 --- a/tests/unit_tests/lapack/source/sytrf.cpp +++ b/tests/unit_tests/lapack/source/sytrf.cpp @@ -74,7 +74,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 #ifdef CALL_RT_API oneapi::math::lapack::sytrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::sytrf, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev, scratchpad_size); @@ -114,9 +114,9 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 reference::swap(n, Uk.data() + (k + 0 * ldu), ldu, Uk.data() + (piv + 0 * ldu), ldu); auto U_temp = U; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, - n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(), - ldu); + reference::gemm(oneapi::math::transpose::nontrans, + oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu, + Uk.data(), ldu, 0.0, U.data(), ldu); D[k + k * ldd] = A[k + k * lda]; k -= 1; @@ -132,9 +132,9 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 reference::swap(n, Uk.data() + (k - 1 + 0 * ldu), ldu, Uk.data() + (piv + 0 * ldu), ldu); auto U_temp = U; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, - n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(), - ldu); + reference::gemm(oneapi::math::transpose::nontrans, + oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu, + Uk.data(), ldu, 0.0, U.data(), ldu); D[k + k * ldd] = A[k + k * lda]; D[k - 1 + (k - 1) * ldd] = A[k - 1 + (k - 1) * lda]; @@ -157,9 +157,9 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 reference::swap(n, Uk.data() + (k + 0 * lda), ldu, Uk.data() + (piv + 0 * ldu), ldu); auto U_temp = U; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, - n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(), - ldu); + reference::gemm(oneapi::math::transpose::nontrans, + oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu, + Uk.data(), ldu, 0.0, U.data(), ldu); D[k + (k)*ldd] = A[k + (k)*lda]; k += 1; @@ -175,9 +175,9 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 reference::swap(n, Uk.data() + (k + 1 + 0 * ldu), ldu, Uk.data() + (piv + 0 * ldu), ldu); auto U_temp = U; - reference::gemm(oneapi::math::transpose::nontrans, oneapi::math::transpose::nontrans, - n, n, n, 1.0, U_temp.data(), ldu, Uk.data(), ldu, 0.0, U.data(), - ldu); + reference::gemm(oneapi::math::transpose::nontrans, + oneapi::math::transpose::nontrans, n, n, n, 1.0, U_temp.data(), ldu, + Uk.data(), ldu, 0.0, U.data(), ldu); D[k + k * ldd] = A[k + k * lda]; D[k + 1 + (k + 1) * ldd] = A[k + 1 + (k + 1) * lda]; @@ -250,7 +250,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::sytrf(queue, uplo, n, A_dev, lda, ipiv_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::sytrf, uplo, n, A_dev, diff --git a/tests/unit_tests/lapack/source/trtrs.cpp b/tests/unit_tests/lapack/source/trtrs.cpp index 4857e3892..3f847028c 100644 --- a/tests/unit_tests/lapack/source/trtrs.cpp +++ b/tests/unit_tests/lapack/source/trtrs.cpp @@ -86,7 +86,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, oneapi::math::tr #ifdef CALL_RT_API oneapi::math::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, A_dev, lda, B_dev, ldb, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::trtrs, uplo, trans, diag, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev, scratchpad_size); @@ -155,9 +155,9 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, oneapi::ma scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::trtrs, uplo, trans, diag, - n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev, scratchpad_size, - std::vector{ in_event }); + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::trtrs, uplo, trans, + diag, n, nrhs, A_dev, lda, B_dev, ldb, scratchpad_dev, + scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/ungbr.cpp b/tests/unit_tests/lapack/source/ungbr.cpp index 723429025..f73085664 100644 --- a/tests/unit_tests/lapack/source/ungbr.cpp +++ b/tests/unit_tests/lapack/source/ungbr.cpp @@ -94,7 +94,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::generate vect, int64_t m, i #ifdef CALL_RT_API oneapi::math::lapack::ungbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungbr, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size); @@ -172,7 +172,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::generate vect, int64_ #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::ungbr(queue, vect, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ungbr, vect, m, n, k, diff --git a/tests/unit_tests/lapack/source/ungqr.cpp b/tests/unit_tests/lapack/source/ungqr.cpp index 63737914a..115b2e5e5 100644 --- a/tests/unit_tests/lapack/source/ungqr.cpp +++ b/tests/unit_tests/lapack/source/ungqr.cpp @@ -81,7 +81,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t #ifdef CALL_RT_API oneapi::math::lapack::ungqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungqr, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size); @@ -145,7 +145,7 @@ bool usm_dependency(const sycl::device& dev, int64_t m, int64_t n, int64_t k, in #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::ungqr(queue, m, n, k, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ungqr, m, n, k, A_dev, diff --git a/tests/unit_tests/lapack/source/ungqr_batch_group.cpp b/tests/unit_tests/lapack/source/ungqr_batch_group.cpp index 0b188ce1c..9ed03c1cf 100644 --- a/tests/unit_tests/lapack/source/ungqr_batch_group.cpp +++ b/tests/unit_tests/lapack/source/ungqr_batch_group.cpp @@ -131,8 +131,8 @@ bool accuracy(const sycl::device& dev, uint64_t seed) { #ifdef CALL_RT_API oneapi::math::lapack::ungqr_batch(queue, m_vec.data(), n_vec.data(), k_vec.data(), - A_dev_ptrs, lda_vec.data(), tau_dev_ptrs, group_count, - group_sizes_vec.data(), scratchpad_dev, scratchpad_size); + A_dev_ptrs, lda_vec.data(), tau_dev_ptrs, group_count, + group_sizes_vec.data(), scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungqr_batch, m_vec.data(), n_vec.data(), k_vec.data(), A_dev_ptrs, lda_vec.data(), diff --git a/tests/unit_tests/lapack/source/ungqr_batch_stride.cpp b/tests/unit_tests/lapack/source/ungqr_batch_stride.cpp index 782648c8d..446bfd8f8 100644 --- a/tests/unit_tests/lapack/source/ungqr_batch_stride.cpp +++ b/tests/unit_tests/lapack/source/ungqr_batch_stride.cpp @@ -83,7 +83,7 @@ bool accuracy(const sycl::device& dev, int64_t m, int64_t n, int64_t k, int64_t #ifdef CALL_RT_API oneapi::math::lapack::ungqr_batch(queue, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau, - batch_size, scratchpad_dev, scratchpad_size); + batch_size, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungqr_batch, m, n, k, A_dev, lda, stride_a, tau_dev, stride_tau, batch_size, scratchpad_dev, diff --git a/tests/unit_tests/lapack/source/ungtr.cpp b/tests/unit_tests/lapack/source/ungtr.cpp index 628ec95c0..ed516b03c 100644 --- a/tests/unit_tests/lapack/source/ungtr.cpp +++ b/tests/unit_tests/lapack/source/ungtr.cpp @@ -80,7 +80,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, int64 #ifdef CALL_RT_API oneapi::math::lapack::ungtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size); + scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::ungtr, uplo, n, A_dev, lda, tau_dev, scratchpad_dev, scratchpad_size); @@ -147,7 +147,7 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t n, #ifdef CALL_RT_API sycl::event func_event = oneapi::math::lapack::ungtr(queue, uplo, n, A_dev, lda, tau_dev, scratchpad_dev, - scratchpad_size, std::vector{ in_event }); + scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::ungtr, uplo, n, A_dev, diff --git a/tests/unit_tests/lapack/source/unmqr.cpp b/tests/unit_tests/lapack/source/unmqr.cpp index d613751e3..c0a147988 100644 --- a/tests/unit_tests/lapack/source/unmqr.cpp +++ b/tests/unit_tests/lapack/source/unmqr.cpp @@ -92,7 +92,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::side left_right, oneapi::ma #ifdef CALL_RT_API oneapi::math::lapack::unmqr(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, - ldc, scratchpad_dev, scratchpad_size); + ldc, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::unmqr, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size); @@ -185,8 +185,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::side left_right, scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::unmqr, left_right, trans, - m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::unmqr, left_right, + trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/unmrq.cpp b/tests/unit_tests/lapack/source/unmrq.cpp index 41d99a389..b41d5eda3 100644 --- a/tests/unit_tests/lapack/source/unmrq.cpp +++ b/tests/unit_tests/lapack/source/unmrq.cpp @@ -102,7 +102,7 @@ bool accuracy(const sycl::device& dev, oneapi::math::side left_right, oneapi::ma #ifdef CALL_RT_API oneapi::math::lapack::unmrq(queue, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, - ldc, scratchpad_dev, scratchpad_size); + ldc, scratchpad_dev, scratchpad_size); #else TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::unmrq, left_right, trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size); @@ -194,8 +194,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::side left_right, scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::unmrq, left_right, trans, - m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::unmrq, left_right, + trans, m, n, k, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/lapack/source/unmtr.cpp b/tests/unit_tests/lapack/source/unmtr.cpp index 00761c469..e1f969204 100644 --- a/tests/unit_tests/lapack/source/unmtr.cpp +++ b/tests/unit_tests/lapack/source/unmtr.cpp @@ -90,10 +90,10 @@ bool accuracy(const sycl::device& dev, oneapi::math::uplo uplo, int64_t m, int64 #ifdef CALL_RT_API oneapi::math::lapack::unmtr(queue, side, uplo, trans, m, n, A_dev, lda, tau_dev, C_dev, ldc, - scratchpad_dev, scratchpad_size); + scratchpad_dev, scratchpad_size); #else - TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::unmtr, side, uplo, trans, m, n, A_dev, - lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size); + TEST_RUN_LAPACK_CT_SELECT(queue, oneapi::math::lapack::unmtr, side, uplo, trans, m, n, + A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size); #endif queue.wait_and_throw(); @@ -184,8 +184,8 @@ bool usm_dependency(const sycl::device& dev, oneapi::math::uplo uplo, int64_t m, scratchpad_size, std::vector{ in_event }); #else sycl::event func_event; - TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::unmtr, side, uplo, trans, - m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, + TEST_RUN_LAPACK_CT_SELECT(queue, func_event = oneapi::math::lapack::unmtr, side, uplo, + trans, m, n, A_dev, lda, tau_dev, C_dev, ldc, scratchpad_dev, scratchpad_size, std::vector{ in_event }); #endif result = check_dependency(queue, in_event, func_event); diff --git a/tests/unit_tests/main_test.cpp b/tests/unit_tests/main_test.cpp index 18e6b07a7..df812e4c8 100644 --- a/tests/unit_tests/main_test.cpp +++ b/tests/unit_tests/main_test.cpp @@ -114,8 +114,7 @@ int main(int argc, char** argv) { unique_devices.insert(dev.get_info()); #if !defined(ONEMATH_ENABLE_MKLCPU_BACKEND) && \ !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_INTEL_CPU) && \ - !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) && \ - !defined(ONEMATH_ENABLE_NETLIB_BACKEND) + !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) && !defined(ONEMATH_ENABLE_NETLIB_BACKEND) if (dev.is_cpu()) continue; #endif @@ -125,24 +124,21 @@ int main(int argc, char** argv) { if (dev.is_gpu() && vendor_id == INTEL_ID) continue; #endif -#if !defined(ONEMATH_ENABLE_CUBLAS_BACKEND) && \ - !defined(ONEMATH_ENABLE_CURAND_BACKEND) && \ - !defined(ONEMATH_ENABLE_CUSOLVER_BACKEND) && \ - !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU) && \ +#if !defined(ONEMATH_ENABLE_CUBLAS_BACKEND) && !defined(ONEMATH_ENABLE_CURAND_BACKEND) && \ + !defined(ONEMATH_ENABLE_CUSOLVER_BACKEND) && \ + !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU) && \ !defined(ONEMATH_ENABLE_CUFFT_BACKEND) && !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) if (dev.is_gpu() && vendor_id == NVIDIA_ID) continue; #endif -#if !defined(ONEMATH_ENABLE_ROCBLAS_BACKEND) && \ - !defined(ONEMATH_ENABLE_ROCRAND_BACKEND) && \ - !defined(ONEMATH_ENABLE_ROCSOLVER_BACKEND) && \ - !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU) && \ - !defined(ONEMATH_ENABLE_ROCFFT_BACKEND) && \ - !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) +#if !defined(ONEMATH_ENABLE_ROCBLAS_BACKEND) && !defined(ONEMATH_ENABLE_ROCRAND_BACKEND) && \ + !defined(ONEMATH_ENABLE_ROCSOLVER_BACKEND) && \ + !defined(ONEMATH_ENABLE_PORTBLAS_BACKEND_AMD_GPU) && \ + !defined(ONEMATH_ENABLE_ROCFFT_BACKEND) && !defined(ONEMATH_ENABLE_PORTFFT_BACKEND) if (dev.is_gpu() && vendor_id == AMD_ID) continue; #endif -// clang-format off + // clang-format off #ifdef __HIPSYCL__ if (dev.is_accelerator()) #else diff --git a/tests/unit_tests/rng/device/include/moments.hpp b/tests/unit_tests/rng/device/include/moments.hpp index 661e85f32..7b360d5c3 100644 --- a/tests/unit_tests/rng/device/include/moments.hpp +++ b/tests/unit_tests/rng/device/include/moments.hpp @@ -53,15 +53,15 @@ class moments_test { Distribution, oneapi::math::rng::device::uniform< std::int32_t, oneapi::math::rng::device::uniform_method::accurate>> || - std::is_same_v> || std::is_same_v< Distribution, oneapi::math::rng::device::poisson< - std::int32_t, oneapi::math::rng::device::poisson_method::devroye>>)&&!queue - .get_device() - .has(sycl::aspect::fp64)) { + std::uint32_t, oneapi::math::rng::device::poisson_method::devroye>> || + std::is_same_v< + Distribution, + oneapi::math::rng::device::poisson< + std::int32_t, oneapi::math::rng::device::poisson_method::devroye>>) && + !queue.get_device().has(sycl::aspect::fp64)) { status = test_skipped; return; } diff --git a/tests/unit_tests/rng/device/include/rng_device_test_common.hpp b/tests/unit_tests/rng/device/include/rng_device_test_common.hpp index ac646fdeb..5e373e2cf 100644 --- a/tests/unit_tests/rng/device/include/rng_device_test_common.hpp +++ b/tests/unit_tests/rng/device/include/rng_device_test_common.hpp @@ -34,10 +34,9 @@ #define N_GEN_SERVICE (N_ENGINES * N_PORTION) // defines for skip_ahead_ex tests -#define N_SKIP ((std::uint64_t)pow(2, 62)) -#define SKIP_TIMES ((std::int32_t)pow(2, 14)) -#define NUM_TO_SKIP \ - { 0, (std::uint64_t)pow(2, 12) } +#define N_SKIP ((std::uint64_t)pow(2, 62)) +#define SKIP_TIMES ((std::int32_t)pow(2, 14)) +#define NUM_TO_SKIP { 0, (std::uint64_t)pow(2, 12) } // Correctness checking. static inline bool check_equal_device(float x, float x_ref) { diff --git a/tests/unit_tests/rng/device/include/skip_ahead_test.hpp b/tests/unit_tests/rng/device/include/skip_ahead_test.hpp index fb8648248..775e57031 100644 --- a/tests/unit_tests/rng/device/include/skip_ahead_test.hpp +++ b/tests/unit_tests/rng/device/include/skip_ahead_test.hpp @@ -126,7 +126,7 @@ class skip_ahead_ex_test { size_t id = item.get_id(0); Engine engine(SEED); oneapi::math::rng::device::skip_ahead(engine, - { id * Engine::vec_size, skip_num }); + { id * Engine::vec_size, skip_num }); oneapi::math::rng::device::bits<> distr; auto res = oneapi::math::rng::device::generate(distr, engine); if constexpr (Engine::vec_size == 1) { diff --git a/tests/unit_tests/rng/device/moments/moments.cpp b/tests/unit_tests/rng/device/moments/moments.cpp index 6449b2d0c..a191b67df 100644 --- a/tests/unit_tests/rng/device/moments/moments.cpp +++ b/tests/unit_tests/rng/device/moments/moments.cpp @@ -1096,19 +1096,19 @@ TEST_P(Philox4x32x10BetaCjaDeviceMomentsTests, RealDoublePrecision) { } TEST_P(Philox4x32x10BetaCjaAccDeviceMomentsTests, RealSinglePrecision) { - rng_device_test, - oneapi::math::rng::device::beta>> + rng_device_test, + oneapi::math::rng::device::beta< + float, oneapi::math::rng::device::beta_method::cja_accurate>>> test1; EXPECT_TRUEORSKIP((test1(GetParam()))); - rng_device_test, - oneapi::math::rng::device::beta>> + rng_device_test, + oneapi::math::rng::device::beta< + float, oneapi::math::rng::device::beta_method::cja_accurate>>> test2; EXPECT_TRUEORSKIP((test2(GetParam()))); - rng_device_test, - oneapi::math::rng::device::beta>> + rng_device_test, + oneapi::math::rng::device::beta< + float, oneapi::math::rng::device::beta_method::cja_accurate>>> test3; EXPECT_TRUEORSKIP((test3(GetParam()))); } @@ -1148,19 +1148,19 @@ class Philox4x32x10GammaMarsagliaAccDeviceMomentsTests : public ::testing::TestWithParam {}; TEST_P(Philox4x32x10GammaMarsagliaDeviceMomentsTests, RealSinglePrecision) { - rng_device_test, - oneapi::math::rng::device::gamma>> + rng_device_test, + oneapi::math::rng::device::gamma< + float, oneapi::math::rng::device::gamma_method::marsaglia>>> test1; EXPECT_TRUEORSKIP((test1(GetParam()))); - rng_device_test, - oneapi::math::rng::device::gamma>> + rng_device_test, + oneapi::math::rng::device::gamma< + float, oneapi::math::rng::device::gamma_method::marsaglia>>> test2; EXPECT_TRUEORSKIP((test2(GetParam()))); - rng_device_test, - oneapi::math::rng::device::gamma>> + rng_device_test, + oneapi::math::rng::device::gamma< + float, oneapi::math::rng::device::gamma_method::marsaglia>>> test3; EXPECT_TRUEORSKIP((test3(GetParam()))); } @@ -1168,19 +1168,19 @@ TEST_P(Philox4x32x10GammaMarsagliaDeviceMomentsTests, RealSinglePrecision) { TEST_P(Philox4x32x10GammaMarsagliaDeviceMomentsTests, RealDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(GetParam()); - rng_device_test, - oneapi::math::rng::device::gamma>> + rng_device_test, + oneapi::math::rng::device::gamma< + double, oneapi::math::rng::device::gamma_method::marsaglia>>> test1; EXPECT_TRUEORSKIP((test1(GetParam()))); - rng_device_test, - oneapi::math::rng::device::gamma>> + rng_device_test, + oneapi::math::rng::device::gamma< + double, oneapi::math::rng::device::gamma_method::marsaglia>>> test2; EXPECT_TRUEORSKIP((test2(GetParam()))); - rng_device_test, - oneapi::math::rng::device::gamma>> + rng_device_test, + oneapi::math::rng::device::gamma< + double, oneapi::math::rng::device::gamma_method::marsaglia>>> test3; EXPECT_TRUEORSKIP((test3(GetParam()))); } diff --git a/tests/unit_tests/rng/include/rng_test_common.hpp b/tests/unit_tests/rng/include/rng_test_common.hpp index 15c32777b..3d3601553 100644 --- a/tests/unit_tests/rng/include/rng_test_common.hpp +++ b/tests/unit_tests/rng/include/rng_test_common.hpp @@ -34,10 +34,9 @@ #define N_GEN_SERVICE (N_ENGINES * N_PORTION) // defines for skip_ahead_ex tests -#define N_SKIP ((std::uint64_t)pow(2, 62)) -#define SKIP_TIMES ((std::int32_t)pow(2, 14)) -#define NUM_TO_SKIP \ - { 0, (std::uint64_t)pow(2, 12) } +#define N_SKIP ((std::uint64_t)pow(2, 62)) +#define SKIP_TIMES ((std::int32_t)pow(2, 14)) +#define NUM_TO_SKIP { 0, (std::uint64_t)pow(2, 12) } // Correctness checking. static inline bool check_equal(float x, float x_ref) { diff --git a/tests/unit_tests/rng/statistics_check/gaussian.cpp b/tests/unit_tests/rng/statistics_check/gaussian.cpp index 9b693c44d..8f33db53d 100644 --- a/tests/unit_tests/rng/statistics_check/gaussian.cpp +++ b/tests/unit_tests/rng/statistics_check/gaussian.cpp @@ -30,14 +30,14 @@ class GaussianBoxmullerTest : public ::testing::TestWithParam {}; class GaussianIcdfTest : public ::testing::TestWithParam {}; TEST_P(GaussianIcdfTest, RealSinglePrecision) { - rng_test< - statistics_test, - oneapi::math::rng::philox4x32x10>> + rng_test, + oneapi::math::rng::philox4x32x10>> test1; EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); - rng_test< - statistics_test, - oneapi::math::rng::mrg32k3a>> + rng_test, + oneapi::math::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_FLOAT))); } @@ -45,14 +45,14 @@ TEST_P(GaussianIcdfTest, RealSinglePrecision) { TEST_P(GaussianIcdfTest, RealDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(GetParam()); - rng_test< - statistics_test, - oneapi::math::rng::philox4x32x10>> + rng_test, + oneapi::math::rng::philox4x32x10>> test1; EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); - rng_test< - statistics_test, - oneapi::math::rng::mrg32k3a>> + rng_test, + oneapi::math::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, GAUSSIAN_ARGS_DOUBLE))); } diff --git a/tests/unit_tests/rng/statistics_check/poisson_usm.cpp b/tests/unit_tests/rng/statistics_check/poisson_usm.cpp old mode 100755 new mode 100644 index 44a1e6b4c..6927669b0 --- a/tests/unit_tests/rng/statistics_check/poisson_usm.cpp +++ b/tests/unit_tests/rng/statistics_check/poisson_usm.cpp @@ -30,13 +30,13 @@ class PoissonIcdfUsmTests : public ::testing::TestWithParam {}; TEST_P(PoissonIcdfUsmTests, IntegerPrecision) { rng_test, + oneapi::math::rng::poisson_method::gaussian_icdf_based>, oneapi::math::rng::philox4x32x10>> test1; EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, POISSON_ARGS))); rng_test, + oneapi::math::rng::poisson_method::gaussian_icdf_based>, oneapi::math::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS))); @@ -45,13 +45,13 @@ TEST_P(PoissonIcdfUsmTests, IntegerPrecision) { TEST_P(PoissonIcdfUsmTests, UnsignedIntegerPrecision) { rng_test, + oneapi::math::rng::poisson_method::gaussian_icdf_based>, oneapi::math::rng::philox4x32x10>> test1; EXPECT_TRUEORSKIP((test1(GetParam(), N_GEN, POISSON_ARGS))); rng_test, + oneapi::math::rng::poisson_method::gaussian_icdf_based>, oneapi::math::rng::mrg32k3a>> test2; EXPECT_TRUEORSKIP((test2(GetParam(), N_GEN, POISSON_ARGS))); diff --git a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp index a02957905..584de713d 100644 --- a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp +++ b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp @@ -55,9 +55,9 @@ inline T opVal(const T t, const bool isConj) { }; template -void do_csr_transpose(const oneapi::math::transpose opA, intType *ia_t, intType *ja_t, fpType *a_t, - intType a_nrows, intType a_ncols, intType indexing, accIntType &ia, - accIntType &ja, accFpType &a, const bool structOnlyFlag = false) { +void do_csr_transpose(const oneapi::math::transpose opA, intType* ia_t, intType* ja_t, fpType* a_t, + intType a_nrows, intType a_ncols, intType indexing, accIntType& ia, + accIntType& ja, accFpType& a, const bool structOnlyFlag = false) { const bool isConj = (opA == oneapi::math::transpose::conjtrans); // initialize ia_t to zero @@ -105,7 +105,7 @@ void do_csr_transpose(const oneapi::math::transpose opA, intType *ia_t, intType // Transpose the given sparse matrix if needed template -auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpType *a, +auto sparse_transpose_if_needed(const intType* ia, const intType* ja, const fpType* a, intType a_nrows, intType a_ncols, std::size_t nnz, intType indexing, oneapi::math::transpose transpose_val) { std::vector iopa; @@ -134,7 +134,7 @@ auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpTy /// Reduce the leading dimension to the minimum and transpose the matrix if needed /// The outputted matrix always uses row major layout template -auto extract_dense_matrix(const fpType *x, std::size_t nrows, std::size_t ncols, std::size_t ld, +auto extract_dense_matrix(const fpType* x, std::size_t nrows, std::size_t ncols, std::size_t ld, oneapi::math::transpose transpose_val, oneapi::math::layout dense_matrix_layout) { const bool is_row_major = dense_matrix_layout == oneapi::math::layout::row_major; @@ -161,8 +161,8 @@ auto extract_dense_matrix(const fpType *x, std::size_t nrows, std::size_t ncols, /// Convert the sparse matrix in the given format to a dense matrix A in row major layout applied with A_view. template -std::vector sparse_to_dense(sparse_matrix_format_t format, const intType *ia, - const intType *ja, const fpType *a, std::size_t a_nrows, +std::vector sparse_to_dense(sparse_matrix_format_t format, const intType* ia, + const intType* ja, const fpType* a, std::size_t a_nrows, std::size_t a_ncols, std::size_t nnz, intType indexing, oneapi::math::transpose transpose_val, oneapi::math::sparse::matrix_view A_view) { diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index a9b8841d9..d64e36de2 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -66,16 +66,16 @@ static std::vector> test_matrix_ oneapi::math::sparse::matrix_property::symmetric } }; -void print_error_code(sycl::exception const &e); +void print_error_code(sycl::exception const& e); // Catch asynchronous exceptions. struct exception_handler_t { void operator()(sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cout << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl; print_error_code(e); } @@ -86,7 +86,7 @@ struct exception_handler_t { struct UsmDeleter { sycl::queue q; UsmDeleter(sycl::queue _q) : q(_q) {} - void operator()(void *ptr) { + void operator()(void* ptr) { sycl::free(ptr, q); } }; @@ -99,14 +99,14 @@ auto malloc_device_uptr(sycl::queue q, std::size_t num_elts) { // SYCL buffer creation helper. template -sycl::buffer make_buffer(const vec &v) { +sycl::buffer make_buffer(const vec& v) { sycl::buffer buf(v.data(), sycl::range<1>(v.size())); return buf; } template -void copy_host_to_buffer(sycl::queue queue, const std::vector &src, sycl::buffer dst) { - queue.submit([&](sycl::handler &cgh) { +void copy_host_to_buffer(sycl::queue queue, const std::vector& src, sycl::buffer dst) { + queue.submit([&](sycl::handler& cgh) { auto dst_acc = dst.template get_access( cgh, sycl::range<1>(src.size())); cgh.copy(src.data(), dst_acc); @@ -168,7 +168,7 @@ struct rand_scalar> { }; template -void rand_vector(std::vector &v, std::size_t n) { +void rand_vector(std::vector& v, std::size_t n) { using fpRealType = typename complex_info::real_type; v.resize(n); rand_scalar rand; @@ -178,7 +178,7 @@ void rand_vector(std::vector &v, std::size_t n) { } template -void rand_matrix(std::vector &m, oneapi::math::layout layout_val, std::size_t nrows, +void rand_matrix(std::vector& m, oneapi::math::layout layout_val, std::size_t nrows, std::size_t ncols, std::size_t ld, oneapi::math::transpose transpose_val = oneapi::math::transpose::nontrans) { using fpRealType = typename complex_info::real_type; @@ -221,8 +221,8 @@ fpType generate_data(bool is_diag) { template intType generate_random_csr_matrix(const intType nrows, const intType ncols, const double density_val, intType indexing, - std::vector &ia, std::vector &ja, - std::vector &a, bool is_symmetric, + std::vector& ia, std::vector& ja, + std::vector& a, bool is_symmetric, bool require_diagonal = false) { intType nnz = 0; rand_scalar rand_density; @@ -272,8 +272,8 @@ intType generate_random_csr_matrix(const intType nrows, const intType ncols, template intType generate_random_coo_matrix(const intType nrows, const intType ncols, const double density_val, intType indexing, - std::vector &ia, std::vector &ja, - std::vector &a, bool is_symmetric, + std::vector& ia, std::vector& ja, + std::vector& a, bool is_symmetric, bool require_diagonal = false) { rand_scalar rand_density; @@ -315,8 +315,8 @@ intType generate_random_coo_matrix(const intType nrows, const intType ncols, template intType generate_random_matrix(sparse_matrix_format_t format, const intType nrows, const intType ncols, const double density_val, intType indexing, - std::vector &ia, std::vector &ja, - std::vector &a, bool is_symmetric, + std::vector& ia, std::vector& ja, + std::vector& a, bool is_symmetric, bool require_diagonal = false) { ia.clear(); ja.clear(); @@ -337,8 +337,8 @@ intType generate_random_matrix(sparse_matrix_format_t format, const intType nrow /// In CSR format, the elements within a row are shuffled without changing ia. /// In COO format, all the elements are shuffled. template -void shuffle_sparse_matrix(sparse_matrix_format_t format, intType indexing, intType *ia, - intType *ja, fpType *a, intType nnz, std::size_t nrows) { +void shuffle_sparse_matrix(sparse_matrix_format_t format, intType indexing, intType* ia, + intType* ja, fpType* a, intType nnz, std::size_t nrows) { if (format == sparse_matrix_format_t::CSR) { for (std::size_t i = 0; i < nrows; ++i) { intType nnz_row = ia[i + 1] - ia[i]; @@ -361,14 +361,14 @@ void shuffle_sparse_matrix(sparse_matrix_format_t format, intType indexing, intT } else { throw oneapi::math::exception("sparse_blas", "shuffle_sparse_matrix", - "Internal error: unsupported format"); + "Internal error: unsupported format"); } } /// Initialize a sparse matrix specified by the given format template -void init_sparse_matrix(sycl::queue &queue, sparse_matrix_format_t format, - oneapi::math::sparse::matrix_handle_t *p_smhandle, std::int64_t num_rows, +void init_sparse_matrix(sycl::queue& queue, sparse_matrix_format_t format, + oneapi::math::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) { if (format == sparse_matrix_format_t::CSR) { @@ -381,33 +381,33 @@ void init_sparse_matrix(sycl::queue &queue, sparse_matrix_format_t format, } else { throw oneapi::math::exception("sparse_blas", "init_sparse_matrix", - "Internal error: unsupported format"); + "Internal error: unsupported format"); } } /// Reset the data of a sparse matrix specified by the given format template -void set_matrix_data(sycl::queue &queue, sparse_matrix_format_t format, +void set_matrix_data(sycl::queue& queue, sparse_matrix_format_t format, oneapi::math::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, oneapi::math::index_base index, ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) { if (format == sparse_matrix_format_t::CSR) { - CALL_RT_OR_CT(oneapi::math::sparse::set_csr_matrix_data, queue, smhandle, num_rows, num_cols, - nnz, index, rows, cols, vals); + CALL_RT_OR_CT(oneapi::math::sparse::set_csr_matrix_data, queue, smhandle, num_rows, + num_cols, nnz, index, rows, cols, vals); } else if (format == sparse_matrix_format_t::COO) { - CALL_RT_OR_CT(oneapi::math::sparse::set_coo_matrix_data, queue, smhandle, num_rows, num_cols, - nnz, index, rows, cols, vals); + CALL_RT_OR_CT(oneapi::math::sparse::set_coo_matrix_data, queue, smhandle, num_rows, + num_cols, nnz, index, rows, cols, vals); } else { throw oneapi::math::exception("sparse_blas", "set_matrix_data", - "Internal error: unsupported format"); + "Internal error: unsupported format"); } } template -inline void free_handles(sycl::queue &queue, const std::vector dependencies, - HandlesT &&... handles) { +inline void free_handles(sycl::queue& queue, const std::vector dependencies, + HandlesT&&... handles) { // Fold expression so that handles expands to each value one after the other. ( [&] { @@ -436,19 +436,19 @@ inline void free_handles(sycl::queue &queue, const std::vector depe } template -inline void free_handles(sycl::queue &queue, HandlesT &&... handles) { +inline void free_handles(sycl::queue& queue, HandlesT&&... handles) { free_handles(queue, {}, handles...); } template -inline void wait_and_free_handles(sycl::queue &queue, HandlesT &&... handles) { +inline void wait_and_free_handles(sycl::queue& queue, HandlesT&&... handles) { queue.wait(); free_handles(queue, handles...); } inline bool require_square_matrix( oneapi::math::sparse::matrix_view A_view, - const std::set &matrix_properties) { + const std::set& matrix_properties) { const bool is_symmetric = matrix_properties.find(oneapi::math::sparse::matrix_property::symmetric) != matrix_properties.cend(); @@ -457,7 +457,7 @@ inline bool require_square_matrix( template bool check_equal(fpType x, fpType x_ref, double abs_error_margin, double rel_error_margin, - std::ostream &out) { + std::ostream& out) { using fpRealType = typename complex_info::real_type; static_assert(std::is_floating_point_v, "Expected floating-point real or complex type."); @@ -478,8 +478,8 @@ bool check_equal(fpType x, fpType x_ref, double abs_error_margin, double rel_err } template -bool check_equal_vector(const vecType1 &v, const vecType2 &v_ref, double abs_error_factor = 10.0, - double rel_error_factor = 200.0, std::ostream &out = std::cout) { +bool check_equal_vector(const vecType1& v, const vecType2& v_ref, double abs_error_factor = 10.0, + double rel_error_factor = 200.0, std::ostream& out = std::cout) { using T = typename vecType2::value_type; std::size_t n = v.size(); if (n != v_ref.size()) { @@ -492,7 +492,7 @@ bool check_equal_vector(const vecType1 &v, const vecType2 &v_ref, double abs_err auto max_norm_ref = *std::max_element(std::begin(v_ref), std::end(v_ref), - [](const T &a, const T &b) { return std::abs(a) < std::abs(b); }); + [](const T& a, const T& b) { return std::abs(a) < std::abs(b); }); // Heuristic for the average-case error margins double abs_error_margin = abs_error_factor * std::abs(max_norm_ref) * std::log2(static_cast(n)); diff --git a/tests/unit_tests/sparse_blas/include/test_spmm.hpp b/tests/unit_tests/sparse_blas/include/test_spmm.hpp index e34910521..83fa00d99 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmm.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmm.hpp @@ -53,11 +53,11 @@ */ template void test_helper_with_format_with_transpose( - testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev, sparse_matrix_format_t format, - const std::vector &non_default_algorithms, - oneapi::math::transpose transpose_A, oneapi::math::transpose transpose_B, int &num_passed, - int &num_skipped) { + const std::vector& non_default_algorithms, + oneapi::math::transpose transpose_A, oneapi::math::transpose transpose_B, int& num_passed, + int& num_skipped) { double density_A_matrix = 0.8; fpType fp_zero = set_fp_value()(0.f, 0.f); fpType fp_one = set_fp_value()(1.f, 0.f); @@ -217,13 +217,13 @@ void test_helper_with_format_with_transpose( */ template void test_helper_with_format( - testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev, sparse_matrix_format_t format, - const std::vector &non_default_algorithms, int &num_passed, - int &num_skipped) { + const std::vector& non_default_algorithms, int& num_passed, + int& num_skipped) { std::vector transpose_vals{ oneapi::math::transpose::nontrans, - oneapi::math::transpose::trans, - oneapi::math::transpose::conjtrans }; + oneapi::math::transpose::trans, + oneapi::math::transpose::conjtrans }; for (auto transpose_A : transpose_vals) { for (auto transpose_B : transpose_vals) { test_helper_with_format_with_transpose( @@ -245,7 +245,7 @@ void test_helper_with_format( */ template void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, - sycl::device *dev, int &num_passed, int &num_skipped) { + sycl::device* dev, int& num_passed, int& num_skipped) { test_helper_with_format( test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, { oneapi::math::sparse::spmm_alg::no_optimize_alg, oneapi::math::sparse::spmm_alg::csr_alg1, @@ -261,14 +261,14 @@ void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i6 /// Compute spmm reference as a dense operation template -void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *ia, - const intType *ja, const fpType *a, intType a_nrows, +void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType* ia, + const intType* ja, const fpType* a, intType a_nrows, intType a_ncols, intType c_ncols, intType a_nnz, intType indexing, oneapi::math::layout dense_matrix_layout, oneapi::math::transpose opA, oneapi::math::transpose opB, fpType alpha, fpType beta, intType ldb, intType ldc, - const fpType *b, oneapi::math::sparse::matrix_view A_view, - fpType *c_ref) { + const fpType* b, oneapi::math::sparse::matrix_view A_view, + fpType* c_ref) { std::size_t a_nrows_u = static_cast(a_nrows); std::size_t a_ncols_u = static_cast(a_ncols); std::size_t c_ncols_u = static_cast(c_ncols); @@ -286,7 +286,7 @@ void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *i // Return the linear index to access a dense matrix from auto dense_linear_idx = [=](std::size_t row, std::size_t col, std::size_t ld) { return (dense_matrix_layout == oneapi::math::layout::row_major) ? row * ld + col - : col * ld + row; + : col * ld + row; }; // @@ -300,7 +300,7 @@ void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *i for (std::size_t i = 0; i < opa_ncols; i++) { acc += dense_opa[row * opa_ncols + i] * dense_opb[i * c_ncols_u + col]; } - fpType &c = c_ref[dense_linear_idx(row, col, ldc_u)]; + fpType& c = c_ref[dense_linear_idx(row, col, ldc_u)]; c = alpha * acc + beta * c; } } diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp index 848180c28..d44e8ba7e 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -52,10 +52,10 @@ */ template void test_helper_with_format_with_transpose( - testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev, sparse_matrix_format_t format, - const std::vector &non_default_algorithms, - oneapi::math::transpose transpose_val, int &num_passed, int &num_skipped) { + const std::vector& non_default_algorithms, + oneapi::math::transpose transpose_val, int& num_passed, int& num_skipped) { double density_A_matrix = 0.8; fpType fp_zero = set_fp_value()(0.f, 0.f); fpType fp_one = set_fp_value()(1.f, 0.f); @@ -212,13 +212,13 @@ void test_helper_with_format_with_transpose( */ template void test_helper_with_format( - testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev, sparse_matrix_format_t format, - const std::vector &non_default_algorithms, int &num_passed, - int &num_skipped) { + const std::vector& non_default_algorithms, int& num_passed, + int& num_skipped) { std::vector transpose_vals{ oneapi::math::transpose::nontrans, - oneapi::math::transpose::trans, - oneapi::math::transpose::conjtrans }; + oneapi::math::transpose::trans, + oneapi::math::transpose::conjtrans }; for (auto transpose_A : transpose_vals) { test_helper_with_format_with_transpose(test_functor_i32, test_functor_i64, dev, format, non_default_algorithms, transpose_A, @@ -238,7 +238,7 @@ void test_helper_with_format( */ template void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, - sycl::device *dev, int &num_passed, int &num_skipped) { + sycl::device* dev, int& num_passed, int& num_skipped) { test_helper_with_format( test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, { oneapi::math::sparse::spmv_alg::no_optimize_alg, oneapi::math::sparse::spmv_alg::csr_alg1, @@ -253,12 +253,12 @@ void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i6 /// Compute spmv reference as a dense operation template -void prepare_reference_spmv_data(sparse_matrix_format_t format, const intType *ia, - const intType *ja, const fpType *a, intType a_nrows, +void prepare_reference_spmv_data(sparse_matrix_format_t format, const intType* ia, + const intType* ja, const fpType* a, intType a_nrows, intType a_ncols, intType a_nnz, intType indexing, oneapi::math::transpose opA, fpType alpha, fpType beta, - const fpType *x, oneapi::math::sparse::matrix_view A_view, - fpType *y_ref) { + const fpType* x, oneapi::math::sparse::matrix_view A_view, + fpType* y_ref) { std::size_t a_nrows_u = static_cast(a_nrows); std::size_t a_ncols_u = static_cast(a_ncols); auto [opa_nrows, opa_ncols] = swap_if_transposed(opA, a_nrows_u, a_ncols_u); diff --git a/tests/unit_tests/sparse_blas/include/test_spsv.hpp b/tests/unit_tests/sparse_blas/include/test_spsv.hpp index 16ffe4484..d3937997f 100644 --- a/tests/unit_tests/sparse_blas/include/test_spsv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spsv.hpp @@ -48,16 +48,18 @@ */ template void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, - sycl::device *dev, sparse_matrix_format_t format, - oneapi::math::transpose transpose_val, int &num_passed, - int &num_skipped) { + sycl::device* dev, sparse_matrix_format_t format, + oneapi::math::transpose transpose_val, int& num_passed, + int& num_skipped) { double density_A_matrix = 0.144; fpType alpha = set_fp_value()(1.f, 0.f); int m = 277; oneapi::math::index_base index_zero = oneapi::math::index_base::zero; oneapi::math::sparse::spsv_alg default_alg = oneapi::math::sparse::spsv_alg::default_alg; - oneapi::math::sparse::spsv_alg no_optimize_alg = oneapi::math::sparse::spsv_alg::no_optimize_alg; - oneapi::math::sparse::matrix_view default_A_view(oneapi::math::sparse::matrix_descr::triangular); + oneapi::math::sparse::spsv_alg no_optimize_alg = + oneapi::math::sparse::spsv_alg::no_optimize_alg; + oneapi::math::sparse::matrix_view default_A_view( + oneapi::math::sparse::matrix_descr::triangular); oneapi::math::sparse::matrix_view upper_A_view(oneapi::math::sparse::matrix_descr::triangular); upper_A_view.uplo_view = oneapi::math::uplo::upper; std::set no_properties; @@ -158,8 +160,8 @@ void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 tes */ template void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, - sycl::device *dev, oneapi::math::transpose transpose_val, int &num_passed, - int &num_skipped) { + sycl::device* dev, oneapi::math::transpose transpose_val, int& num_passed, + int& num_skipped) { test_helper_with_format(test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, transpose_val, num_passed, num_skipped); @@ -170,11 +172,11 @@ void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i6 /// Compute spsv reference as a dense operation template -void prepare_reference_spsv_data(sparse_matrix_format_t format, const intType *ia, - const intType *ja, const fpType *a, intType m, intType nnz, - intType indexing, oneapi::math::transpose opA, const fpType *x, +void prepare_reference_spsv_data(sparse_matrix_format_t format, const intType* ia, + const intType* ja, const fpType* a, intType m, intType nnz, + intType indexing, oneapi::math::transpose opA, const fpType* x, fpType alpha, oneapi::math::sparse::matrix_view A_view, - fpType *y_ref) { + fpType* y_ref) { std::size_t mu = static_cast(m); auto dense_opa = sparse_to_dense(format, ia, ja, a, mu, mu, static_cast(nnz), indexing, opA, A_view); @@ -186,8 +188,8 @@ void prepare_reference_spsv_data(sparse_matrix_format_t format, const intType *i // // Compute each element of the reference one after the other starting from 0 (resp. the end) for a lower (resp. upper) triangular matrix. // A matrix is considered lowered if it is lower and not transposed or upper and transposed. - const bool is_lower = - (A_view.uplo_view == oneapi::math::uplo::lower) == (opA == oneapi::math::transpose::nontrans); + const bool is_lower = (A_view.uplo_view == oneapi::math::uplo::lower) == + (opA == oneapi::math::transpose::nontrans); for (std::size_t row = 0; row < mu; row++) { std::size_t uplo_row = is_lower ? row : (mu - 1 - row); fpType rhs = alpha * x[uplo_row]; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp index 330b06bba..1a97c313e 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp @@ -23,18 +23,18 @@ #include "test_spmm.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, +int test_spmm(sycl::device* dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, intType ncols_C, double density_A_matrix, oneapi::math::index_base index, oneapi::math::layout dense_matrix_layout, oneapi::math::transpose transpose_A, oneapi::math::transpose transpose_B, fpType alpha, fpType beta, intType ldb, intType ldc, oneapi::math::sparse::spmm_alg alg, oneapi::math::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { if (test_scalar_on_device) { // Scalars on the device is not planned to be supported with the buffer API @@ -92,7 +92,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_buf, ja_buf, a_buf); for (auto property : matrix_properties) { - CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, property); + CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, + property); } CALL_RT_OR_CT(oneapi::math::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows, opb_ncols, ldb, dense_matrix_layout, b_buf); @@ -154,13 +155,13 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, A_view, A_handle, B_handle, &beta, C_handle, alg, descr); } } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPMM:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, B_handle, C_handle); if (descr) { sycl::event ev_release_descr; @@ -170,7 +171,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPMM:\n" << error.what() << std::endl; return 0; } @@ -190,7 +191,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, return static_cast(valid); } -class SparseSpmmBufferTests : public ::testing::TestWithParam {}; +class SparseSpmmBufferTests : public ::testing::TestWithParam {}; TEST_P(SparseSpmmBufferTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp index 334604a5a..190b63c28 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp @@ -23,18 +23,18 @@ #include "test_spmm.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, +int test_spmm(sycl::device* dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, intType ncols_C, double density_A_matrix, oneapi::math::index_base index, oneapi::math::layout dense_matrix_layout, oneapi::math::transpose transpose_A, oneapi::math::transpose transpose_B, fpType alpha, fpType beta, intType ldb, intType ldc, oneapi::math::sparse::spmm_alg alg, oneapi::math::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { sycl::queue main_queue(*dev, exception_handler_t()); @@ -82,11 +82,11 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); auto beta_usm_uptr = malloc_device_uptr(main_queue, 1); - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *b_usm = b_usm_uptr.get(); - fpType *c_usm = c_usm_uptr.get(); + intType* ia_usm = ia_usm_uptr.get(); + intType* ja_usm = ja_usm_uptr.get(); + fpType* a_usm = a_usm_uptr.get(); + fpType* b_usm = b_usm_uptr.get(); + fpType* c_usm = c_usm_uptr.get(); std::vector mat_dependencies; std::vector spmm_dependencies; @@ -102,8 +102,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, spmm_dependencies.push_back( main_queue.memcpy(c_usm, c_host.data(), c_host.size() * sizeof(fpType))); - fpType *alpha_host_or_usm_ptr = α - fpType *beta_host_or_usm_ptr = β + fpType* alpha_host_or_usm_ptr = α + fpType* beta_host_or_usm_ptr = β if (test_scalar_on_device) { spmm_dependencies.push_back( main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); @@ -122,7 +122,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_usm, ja_usm, a_usm); for (auto property : matrix_properties) { - CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, property); + CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, + property); } CALL_RT_OR_CT(oneapi::math::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows, opb_ncols, ldb, dense_matrix_layout, b_usm); @@ -192,20 +193,20 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, workspace_usm.get(), mat_dependencies); - CALL_RT_OR_CT(ev_spmm = oneapi::math::sparse::spmm, main_queue, transpose_A, transpose_B, - &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, - { ev_opt }); + CALL_RT_OR_CT(ev_spmm = oneapi::math::sparse::spmm, main_queue, transpose_A, + transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, + descr, { ev_opt }); } ev_copy = main_queue.memcpy(c_host.data(), c_usm, c_host.size() * sizeof(fpType), ev_spmm); } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPMM:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, B_handle, C_handle); if (descr) { sycl::event ev_release_descr; @@ -215,7 +216,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPMM:\n" << error.what() << std::endl; return 0; } @@ -238,7 +239,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, return static_cast(valid); } -class SparseSpmmUsmTests : public ::testing::TestWithParam {}; +class SparseSpmmUsmTests : public ::testing::TestWithParam {}; TEST_P(SparseSpmmUsmTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp index 0c4136bcc..76700d483 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp @@ -23,16 +23,16 @@ #include "test_spmv.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, +int test_spmv(sycl::device* dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, double density_A_matrix, oneapi::math::index_base index, oneapi::math::transpose transpose_val, fpType alpha, fpType beta, oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { if (test_scalar_on_device) { // Scalars on the device is not planned to be supported with the buffer API @@ -85,7 +85,8 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_buf, ja_buf, a_buf); for (auto property : matrix_properties) { - CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, property); + CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, + property); } CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &x_handle, static_cast(x_host.size()), x_buf); @@ -99,8 +100,8 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_size); sycl::buffer workspace_buf((sycl::range<1>(workspace_size))); - CALL_RT_OR_CT(oneapi::math::sparse::spmv_optimize, main_queue, transpose_val, &alpha, A_view, - A_handle, x_handle, &beta, y_handle, alg, descr, workspace_buf); + CALL_RT_OR_CT(oneapi::math::sparse::spmv_optimize, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_buf); CALL_RT_OR_CT(oneapi::math::sparse::spmv, main_queue, transpose_val, &alpha, A_view, A_handle, x_handle, &beta, y_handle, alg, descr); @@ -143,13 +144,13 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, A_handle, x_handle, &beta, y_handle, alg, descr); } } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPMV:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); if (descr) { sycl::event ev_release_descr; @@ -159,7 +160,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPMV:\n" << error.what() << std::endl; return 0; } @@ -178,7 +179,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, return static_cast(valid); } -class SparseSpmvBufferTests : public ::testing::TestWithParam {}; +class SparseSpmvBufferTests : public ::testing::TestWithParam {}; TEST_P(SparseSpmvBufferTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp index a3265ea70..f7a9c9960 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp @@ -23,16 +23,16 @@ #include "test_spmv.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, +int test_spmv(sycl::device* dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, double density_A_matrix, oneapi::math::index_base index, oneapi::math::transpose transpose_val, fpType alpha, fpType beta, oneapi::math::sparse::spmv_alg alg, oneapi::math::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { sycl::queue main_queue(*dev, exception_handler_t()); @@ -75,11 +75,11 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); auto beta_usm_uptr = malloc_device_uptr(main_queue, 1); - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *x_usm = x_usm_uptr.get(); - fpType *y_usm = y_usm_uptr.get(); + intType* ia_usm = ia_usm_uptr.get(); + intType* ja_usm = ja_usm_uptr.get(); + fpType* a_usm = a_usm_uptr.get(); + fpType* x_usm = x_usm_uptr.get(); + fpType* y_usm = y_usm_uptr.get(); std::vector mat_dependencies; std::vector spmv_dependencies; @@ -95,8 +95,8 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, spmv_dependencies.push_back( main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); - fpType *alpha_host_or_usm_ptr = α - fpType *beta_host_or_usm_ptr = β + fpType* alpha_host_or_usm_ptr = α + fpType* beta_host_or_usm_ptr = β if (test_scalar_on_device) { spmv_dependencies.push_back( main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); @@ -115,7 +115,8 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_usm, ja_usm, a_usm); for (auto property : matrix_properties) { - CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, property); + CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, + property); } CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &x_handle, static_cast(x_host.size()), x_usm); @@ -191,13 +192,13 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spmv); } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPMV:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); if (descr) { sycl::event ev_release_descr; @@ -207,7 +208,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPMV:\n" << error.what() << std::endl; return 0; } @@ -229,7 +230,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, return static_cast(valid); } -class SparseSpmvUsmTests : public ::testing::TestWithParam {}; +class SparseSpmvUsmTests : public ::testing::TestWithParam {}; TEST_P(SparseSpmvUsmTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp index 62a84120a..286558834 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp @@ -23,15 +23,15 @@ #include "test_spsv.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, double density_A_matrix, +int test_spsv(sycl::device* dev, sparse_matrix_format_t format, intType m, double density_A_matrix, oneapi::math::index_base index, oneapi::math::transpose transpose_val, fpType alpha, oneapi::math::sparse::spsv_alg alg, oneapi::math::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { if (test_scalar_on_device) { // Scalars on the device is not planned to be supported with the buffer API @@ -87,7 +87,8 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl try { init_sparse_matrix(main_queue, format, &A_handle, m, m, nnz, index, ia_buf, ja_buf, a_buf); for (auto property : matrix_properties) { - CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, property); + CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, + property); } CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &x_handle, m, x_buf); CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &y_handle, m, y_buf); @@ -99,8 +100,8 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size); sycl::buffer workspace_buf((sycl::range<1>(workspace_size))); - CALL_RT_OR_CT(oneapi::math::sparse::spsv_optimize, main_queue, transpose_val, &alpha, A_view, - A_handle, x_handle, y_handle, alg, descr, workspace_buf); + CALL_RT_OR_CT(oneapi::math::sparse::spsv_optimize, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, workspace_buf); CALL_RT_OR_CT(oneapi::math::sparse::spsv, main_queue, transpose_val, &alpha, A_view, A_handle, x_handle, y_handle, alg, descr); @@ -141,13 +142,13 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl A_handle, x_handle, y_handle, alg, descr); } } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPSV:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); if (descr) { sycl::event ev_release_descr; @@ -157,7 +158,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPSV:\n" << error.what() << std::endl; return 0; } @@ -176,7 +177,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl return static_cast(valid); } -class SparseSpsvBufferTests : public ::testing::TestWithParam {}; +class SparseSpsvBufferTests : public ::testing::TestWithParam {}; TEST_P(SparseSpsvBufferTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp index 1555bd2c0..da1456daa 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp @@ -23,15 +23,15 @@ #include "test_spsv.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, double density_A_matrix, +int test_spsv(sycl::device* dev, sparse_matrix_format_t format, intType m, double density_A_matrix, oneapi::math::index_base index, oneapi::math::transpose transpose_val, fpType alpha, oneapi::math::sparse::spsv_alg alg, oneapi::math::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { sycl::queue main_queue(*dev, exception_handler_t()); @@ -77,11 +77,11 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *x_usm = x_usm_uptr.get(); - fpType *y_usm = y_usm_uptr.get(); + intType* ia_usm = ia_usm_uptr.get(); + intType* ja_usm = ja_usm_uptr.get(); + fpType* a_usm = a_usm_uptr.get(); + fpType* x_usm = x_usm_uptr.get(); + fpType* y_usm = y_usm_uptr.get(); std::vector mat_dependencies; std::vector spsv_dependencies; @@ -97,7 +97,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl spsv_dependencies.push_back( main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); - fpType *alpha_host_or_usm_ptr = α + fpType* alpha_host_or_usm_ptr = α if (test_scalar_on_device) { spsv_dependencies.push_back( main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); @@ -113,7 +113,8 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl try { init_sparse_matrix(main_queue, format, &A_handle, m, m, nnz, index, ia_usm, ja_usm, a_usm); for (auto property : matrix_properties) { - CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, property); + CALL_RT_OR_CT(oneapi::math::sparse::set_matrix_property, main_queue, A_handle, + property); } CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &x_handle, m, x_usm); CALL_RT_OR_CT(oneapi::math::sparse::init_dense_vector, main_queue, &y_handle, m, y_usm); @@ -186,13 +187,13 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spsv); } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPSV:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::math::unimplemented &e) { + catch (const oneapi::math::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); if (descr) { sycl::event ev_release_descr; @@ -202,7 +203,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPSV:\n" << error.what() << std::endl; return 0; } @@ -224,7 +225,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl return static_cast(valid); } -class SparseSpsvUsmTests : public ::testing::TestWithParam {}; +class SparseSpsvUsmTests : public ::testing::TestWithParam {}; TEST_P(SparseSpsvUsmTests, RealSinglePrecision) { using fpType = float;